xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 772ec989c7718bb40cfbe6762601ac78951cc001)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscMPIInt    size   = mat->size;
748   PetscSF        sf;
749   PetscInt      *lrows;
750   PetscSFNode   *rrows;
751   PetscInt       lastidx = -1, r, p = 0, len = 0;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   /* Create SF where leaves are input rows and roots are owned rows */
756   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
757   for (r = 0; r < n; ++r) lrows[r] = -1;
758   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
759   for (r = 0; r < N; ++r) {
760     const PetscInt idx   = rows[r];
761     PetscBool      found = PETSC_FALSE;
762     /* Trick for efficient searching for sorted rows */
763     if (lastidx > idx) p = 0;
764     lastidx = idx;
765     for (; p < size; ++p) {
766       if (idx >= owners[p] && idx < owners[p+1]) {
767         rrows[r].rank  = p;
768         rrows[r].index = rows[r] - owners[p];
769         found = PETSC_TRUE;
770         break;
771       }
772     }
773     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
774   }
775   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
776   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
777   /* Collect flags for rows to be zeroed */
778   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
779   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
780   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
781   /* Compress and put in row numbers */
782   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
783   /* fix right hand side if needed */
784   if (x && b) {
785     const PetscScalar *xx;
786     PetscScalar       *bb;
787 
788     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
789     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
790     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
791     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
793   }
794   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
795   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
796   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936 
937   /* only change matrix nonzero state if pattern was allowed to be changed */
938   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
939     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
940     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPIAIJ"
947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
950   PetscErrorCode ierr;
951   PetscInt       nt;
952 
953   PetscFunctionBegin;
954   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
955   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
956   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
957   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
958   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMultAdd_MPIAIJ"
977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
985   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   PetscErrorCode ierr;
996   PetscBool      merged;
997 
998   PetscFunctionBegin;
999   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1000   /* do nondiagonal part */
1001   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1002   if (!merged) {
1003     /* send it on its way */
1004     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* receive remote parts: note this assumes the values are not actually */
1008     /* added in yy until the next line, */
1009     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   } else {
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* send it on its way */
1014     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1015     /* values actually were received in the Begin() but we need to call this nop */
1016     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1023 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1024 {
1025   MPI_Comm       comm;
1026   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1027   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1028   IS             Me,Notme;
1029   PetscErrorCode ierr;
1030   PetscInt       M,N,first,last,*notme,i;
1031   PetscMPIInt    size;
1032 
1033   PetscFunctionBegin;
1034   /* Easy test: symmetric diagonal block */
1035   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1036   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1037   if (!*f) PetscFunctionReturn(0);
1038   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1040   if (size == 1) PetscFunctionReturn(0);
1041 
1042   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1043   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1044   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1045   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1046   for (i=0; i<first; i++) notme[i] = i;
1047   for (i=last; i<M; i++) notme[i-last+first] = i;
1048   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1049   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1050   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1051   Aoff = Aoffs[0];
1052   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1053   Boff = Boffs[0];
1054   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1055   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1056   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1057   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1058   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1059   ierr = PetscFree(notme);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1066 {
1067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   /* do nondiagonal part */
1072   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1073   /* send it on its way */
1074   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   /* do local part */
1076   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1077   /* receive remote parts */
1078   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1089 {
1090   PetscErrorCode ierr;
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092 
1093   PetscFunctionBegin;
1094   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1095   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1096   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatScale_MPIAIJ"
1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1109   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatDestroy_MPIAIJ"
1115 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1116 {
1117   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121 #if defined(PETSC_USE_LOG)
1122   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1123 #endif
1124   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1125   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1126   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1127   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1128 #if defined(PETSC_USE_CTABLE)
1129   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1130 #else
1131   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1132 #endif
1133   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1134   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1135   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1136   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1137   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1138   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1139 
1140   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1154 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1155 {
1156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1157   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1158   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1159   PetscErrorCode ierr;
1160   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1161   int            fd;
1162   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1163   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1164   PetscScalar    *column_values;
1165   PetscInt       message_count,flowcontrolcount;
1166   FILE           *file;
1167 
1168   PetscFunctionBegin;
1169   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1170   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1171   nz   = A->nz + B->nz;
1172   if (!rank) {
1173     header[0] = MAT_FILE_CLASSID;
1174     header[1] = mat->rmap->N;
1175     header[2] = mat->cmap->N;
1176 
1177     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1178     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1179     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     /* get largest number of rows any processor has */
1181     rlen  = mat->rmap->n;
1182     range = mat->rmap->range;
1183     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1184   } else {
1185     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186     rlen = mat->rmap->n;
1187   }
1188 
1189   /* load up the local row counts */
1190   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1191   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1192 
1193   /* store the row lengths to the file */
1194   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1195   if (!rank) {
1196     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1197     for (i=1; i<size; i++) {
1198       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1199       rlen = range[i+1] - range[i];
1200       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1201       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1202     }
1203     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1204   } else {
1205     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1206     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1207     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1208   }
1209   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1210 
1211   /* load up the local column indices */
1212   nzmax = nz; /* th processor needs space a largest processor needs */
1213   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1214   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1215   cnt   = 0;
1216   for (i=0; i<mat->rmap->n; i++) {
1217     for (j=B->i[i]; j<B->i[i+1]; j++) {
1218       if ((col = garray[B->j[j]]) > cstart) break;
1219       column_indices[cnt++] = col;
1220     }
1221     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1222     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1223   }
1224   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1225 
1226   /* store the column indices to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     MPI_Status status;
1230     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     for (i=1; i<size; i++) {
1232       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1233       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1234       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1235       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     }
1238     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1239   } else {
1240     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1241     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1242     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1243     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1244   }
1245   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1246 
1247   /* load up the local column values */
1248   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1249   cnt  = 0;
1250   for (i=0; i<mat->rmap->n; i++) {
1251     for (j=B->i[i]; j<B->i[i+1]; j++) {
1252       if (garray[B->j[j]] > cstart) break;
1253       column_values[cnt++] = B->a[j];
1254     }
1255     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1256     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1257   }
1258   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1259 
1260   /* store the column values to the file */
1261   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1262   if (!rank) {
1263     MPI_Status status;
1264     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1265     for (i=1; i<size; i++) {
1266       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1267       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1268       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1269       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     }
1272     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1273   } else {
1274     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1275     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1277     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1278   }
1279   ierr = PetscFree(column_values);CHKERRQ(ierr);
1280 
1281   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1282   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #include <petscdraw.h>
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1289 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1292   PetscErrorCode    ierr;
1293   PetscMPIInt       rank = aij->rank,size = aij->size;
1294   PetscBool         isdraw,iascii,isbinary;
1295   PetscViewer       sviewer;
1296   PetscViewerFormat format;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1300   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1301   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1302   if (iascii) {
1303     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1304     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1305       MatInfo   info;
1306       PetscBool inodes;
1307 
1308       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1309       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1310       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1312       if (!inodes) {
1313         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1314                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1315       } else {
1316         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1317                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1318       }
1319       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1320       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1321       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1323       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1326       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1327       PetscFunctionReturn(0);
1328     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1329       PetscInt inodecount,inodelimit,*inodes;
1330       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1331       if (inodes) {
1332         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1333       } else {
1334         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1335       }
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1338       PetscFunctionReturn(0);
1339     }
1340   } else if (isbinary) {
1341     if (size == 1) {
1342       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1343       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1344     } else {
1345       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1346     }
1347     PetscFunctionReturn(0);
1348   } else if (isdraw) {
1349     PetscDraw draw;
1350     PetscBool isnull;
1351     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1352     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1353   }
1354 
1355   {
1356     /* assemble the entire matrix onto first processor. */
1357     Mat        A;
1358     Mat_SeqAIJ *Aloc;
1359     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1360     MatScalar  *a;
1361 
1362     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1363     if (!rank) {
1364       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1365     } else {
1366       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1367     }
1368     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1369     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1370     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1371     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1372     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1373 
1374     /* copy over the A part */
1375     Aloc = (Mat_SeqAIJ*)aij->A->data;
1376     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1377     row  = mat->rmap->rstart;
1378     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1379     for (i=0; i<m; i++) {
1380       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1381       row++;
1382       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1383     }
1384     aj = Aloc->j;
1385     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1386 
1387     /* copy over the B part */
1388     Aloc = (Mat_SeqAIJ*)aij->B->data;
1389     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1390     row  = mat->rmap->rstart;
1391     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1392     ct   = cols;
1393     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1394     for (i=0; i<m; i++) {
1395       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1396       row++;
1397       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1398     }
1399     ierr = PetscFree(ct);CHKERRQ(ierr);
1400     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1401     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402     /*
1403        Everyone has to call to draw the matrix since the graphics waits are
1404        synchronized across all processors that share the PetscDraw object
1405     */
1406     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1407     if (!rank) {
1408       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1409     }
1410     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1411     ierr = MatDestroy(&A);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatView_MPIAIJ"
1418 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1419 {
1420   PetscErrorCode ierr;
1421   PetscBool      iascii,isdraw,issocket,isbinary;
1422 
1423   PetscFunctionBegin;
1424   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1425   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1426   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1427   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1428   if (iascii || isdraw || isbinary || issocket) {
1429     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSOR_MPIAIJ"
1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1437 {
1438   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1439   PetscErrorCode ierr;
1440   Vec            bb1 = 0;
1441   PetscBool      hasop;
1442 
1443   PetscFunctionBegin;
1444   if (flag == SOR_APPLY_UPPER) {
1445     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1446     PetscFunctionReturn(0);
1447   }
1448 
1449   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1450     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1451   }
1452 
1453   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1454     if (flag & SOR_ZERO_INITIAL_GUESS) {
1455       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1456       its--;
1457     }
1458 
1459     while (its--) {
1460       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462 
1463       /* update rhs: bb1 = bb - B*x */
1464       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1465       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1466 
1467       /* local sweep */
1468       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1469     }
1470   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1471     if (flag & SOR_ZERO_INITIAL_GUESS) {
1472       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1473       its--;
1474     }
1475     while (its--) {
1476       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478 
1479       /* update rhs: bb1 = bb - B*x */
1480       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1481       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1482 
1483       /* local sweep */
1484       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1485     }
1486   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1487     if (flag & SOR_ZERO_INITIAL_GUESS) {
1488       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1489       its--;
1490     }
1491     while (its--) {
1492       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494 
1495       /* update rhs: bb1 = bb - B*x */
1496       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1497       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1498 
1499       /* local sweep */
1500       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1501     }
1502   } else if (flag & SOR_EISENSTAT) {
1503     Vec xx1;
1504 
1505     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1506     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1507 
1508     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     if (!mat->diag) {
1511       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1512       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1513     }
1514     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1515     if (hasop) {
1516       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1517     } else {
1518       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1519     }
1520     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1521 
1522     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1523 
1524     /* local sweep */
1525     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1526     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1527     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1528   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1529 
1530   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatPermute_MPIAIJ"
1536 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1537 {
1538   Mat            aA,aB,Aperm;
1539   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1540   PetscScalar    *aa,*ba;
1541   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1542   PetscSF        rowsf,sf;
1543   IS             parcolp = NULL;
1544   PetscBool      done;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1549   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1550   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1551   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1552 
1553   /* Invert row permutation to find out where my rows should go */
1554   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1555   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1556   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1557   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1558   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1559   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1560 
1561   /* Invert column permutation to find out where my columns should go */
1562   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1563   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1564   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1565   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1566   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1568   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1569 
1570   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1571   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1572   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1573 
1574   /* Find out where my gcols should go */
1575   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1576   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1577   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1578   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1579   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1580   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1581   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1583 
1584   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1585   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1586   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1587   for (i=0; i<m; i++) {
1588     PetscInt row = rdest[i],rowner;
1589     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1590     for (j=ai[i]; j<ai[i+1]; j++) {
1591       PetscInt cowner,col = cdest[aj[j]];
1592       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1593       if (rowner == cowner) dnnz[i]++;
1594       else onnz[i]++;
1595     }
1596     for (j=bi[i]; j<bi[i+1]; j++) {
1597       PetscInt cowner,col = gcdest[bj[j]];
1598       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1599       if (rowner == cowner) dnnz[i]++;
1600       else onnz[i]++;
1601     }
1602   }
1603   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1604   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1605   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1606   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1607   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1608 
1609   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1610   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1612   for (i=0; i<m; i++) {
1613     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1614     PetscInt j0,rowlen;
1615     rowlen = ai[i+1] - ai[i];
1616     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1617       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1618       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1619     }
1620     rowlen = bi[i+1] - bi[i];
1621     for (j0=j=0; j<rowlen; j0=j) {
1622       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1623       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1624     }
1625   }
1626   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1627   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1628   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1629   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1630   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1631   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1632   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1633   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1634   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1635   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1636   *B = Aperm;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 #undef __FUNCT__
1641 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1642 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1643 {
1644   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1645   Mat            A    = mat->A,B = mat->B;
1646   PetscErrorCode ierr;
1647   PetscReal      isend[5],irecv[5];
1648 
1649   PetscFunctionBegin;
1650   info->block_size = 1.0;
1651   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1654   isend[3] = info->memory;  isend[4] = info->mallocs;
1655 
1656   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1657 
1658   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1659   isend[3] += info->memory;  isend[4] += info->mallocs;
1660   if (flag == MAT_LOCAL) {
1661     info->nz_used      = isend[0];
1662     info->nz_allocated = isend[1];
1663     info->nz_unneeded  = isend[2];
1664     info->memory       = isend[3];
1665     info->mallocs      = isend[4];
1666   } else if (flag == MAT_GLOBAL_MAX) {
1667     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1668 
1669     info->nz_used      = irecv[0];
1670     info->nz_allocated = irecv[1];
1671     info->nz_unneeded  = irecv[2];
1672     info->memory       = irecv[3];
1673     info->mallocs      = irecv[4];
1674   } else if (flag == MAT_GLOBAL_SUM) {
1675     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1676 
1677     info->nz_used      = irecv[0];
1678     info->nz_allocated = irecv[1];
1679     info->nz_unneeded  = irecv[2];
1680     info->memory       = irecv[3];
1681     info->mallocs      = irecv[4];
1682   }
1683   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1684   info->fill_ratio_needed = 0;
1685   info->factor_mallocs    = 0;
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatSetOption_MPIAIJ"
1691 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1692 {
1693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   switch (op) {
1698   case MAT_NEW_NONZERO_LOCATIONS:
1699   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1700   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1701   case MAT_KEEP_NONZERO_PATTERN:
1702   case MAT_NEW_NONZERO_LOCATION_ERR:
1703   case MAT_USE_INODES:
1704   case MAT_IGNORE_ZERO_ENTRIES:
1705     MatCheckPreallocated(A,1);
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_ROW_ORIENTED:
1710     a->roworiented = flg;
1711 
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1714     break;
1715   case MAT_NEW_DIAGONALS:
1716     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1717     break;
1718   case MAT_IGNORE_OFF_PROC_ENTRIES:
1719     a->donotstash = flg;
1720     break;
1721   case MAT_SPD:
1722     A->spd_set = PETSC_TRUE;
1723     A->spd     = flg;
1724     if (flg) {
1725       A->symmetric                  = PETSC_TRUE;
1726       A->structurally_symmetric     = PETSC_TRUE;
1727       A->symmetric_set              = PETSC_TRUE;
1728       A->structurally_symmetric_set = PETSC_TRUE;
1729     }
1730     break;
1731   case MAT_SYMMETRIC:
1732     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1733     break;
1734   case MAT_STRUCTURALLY_SYMMETRIC:
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_HERMITIAN:
1738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_SYMMETRY_ETERNAL:
1741     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1742     break;
1743   default:
1744     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatGetRow_MPIAIJ"
1751 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1752 {
1753   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1754   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1755   PetscErrorCode ierr;
1756   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1757   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1758   PetscInt       *cmap,*idx_p;
1759 
1760   PetscFunctionBegin;
1761   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1762   mat->getrowactive = PETSC_TRUE;
1763 
1764   if (!mat->rowvalues && (idx || v)) {
1765     /*
1766         allocate enough space to hold information from the longest row.
1767     */
1768     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1769     PetscInt   max = 1,tmp;
1770     for (i=0; i<matin->rmap->n; i++) {
1771       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1772       if (max < tmp) max = tmp;
1773     }
1774     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1775   }
1776 
1777   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1778   lrow = row - rstart;
1779 
1780   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1781   if (!v)   {pvA = 0; pvB = 0;}
1782   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1783   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1784   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1785   nztot = nzA + nzB;
1786 
1787   cmap = mat->garray;
1788   if (v  || idx) {
1789     if (nztot) {
1790       /* Sort by increasing column numbers, assuming A and B already sorted */
1791       PetscInt imark = -1;
1792       if (v) {
1793         *v = v_p = mat->rowvalues;
1794         for (i=0; i<nzB; i++) {
1795           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1796           else break;
1797         }
1798         imark = i;
1799         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1800         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1801       }
1802       if (idx) {
1803         *idx = idx_p = mat->rowindices;
1804         if (imark > -1) {
1805           for (i=0; i<imark; i++) {
1806             idx_p[i] = cmap[cworkB[i]];
1807           }
1808         } else {
1809           for (i=0; i<nzB; i++) {
1810             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1811             else break;
1812           }
1813           imark = i;
1814         }
1815         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1816         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1817       }
1818     } else {
1819       if (idx) *idx = 0;
1820       if (v)   *v   = 0;
1821     }
1822   }
1823   *nz  = nztot;
1824   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1825   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1834 
1835   PetscFunctionBegin;
1836   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1837   aij->getrowactive = PETSC_FALSE;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatNorm_MPIAIJ"
1843 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1844 {
1845   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1846   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1847   PetscErrorCode ierr;
1848   PetscInt       i,j,cstart = mat->cmap->rstart;
1849   PetscReal      sum = 0.0;
1850   MatScalar      *v;
1851 
1852   PetscFunctionBegin;
1853   if (aij->size == 1) {
1854     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1855   } else {
1856     if (type == NORM_FROBENIUS) {
1857       v = amat->a;
1858       for (i=0; i<amat->nz; i++) {
1859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1860       }
1861       v = bmat->a;
1862       for (i=0; i<bmat->nz; i++) {
1863         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1864       }
1865       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1866       *norm = PetscSqrtReal(*norm);
1867     } else if (type == NORM_1) { /* max column norm */
1868       PetscReal *tmp,*tmp2;
1869       PetscInt  *jj,*garray = aij->garray;
1870       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1871       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1872       *norm = 0.0;
1873       v     = amat->a; jj = amat->j;
1874       for (j=0; j<amat->nz; j++) {
1875         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1876       }
1877       v = bmat->a; jj = bmat->j;
1878       for (j=0; j<bmat->nz; j++) {
1879         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1880       }
1881       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1882       for (j=0; j<mat->cmap->N; j++) {
1883         if (tmp2[j] > *norm) *norm = tmp2[j];
1884       }
1885       ierr = PetscFree(tmp);CHKERRQ(ierr);
1886       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1887     } else if (type == NORM_INFINITY) { /* max row norm */
1888       PetscReal ntemp = 0.0;
1889       for (j=0; j<aij->A->rmap->n; j++) {
1890         v   = amat->a + amat->i[j];
1891         sum = 0.0;
1892         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         v = bmat->a + bmat->i[j];
1896         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1897           sum += PetscAbsScalar(*v); v++;
1898         }
1899         if (sum > ntemp) ntemp = sum;
1900       }
1901       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1902     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1903   }
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatTranspose_MPIAIJ"
1909 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1910 {
1911   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1912   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1913   PetscErrorCode ierr;
1914   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1915   PetscInt       cstart = A->cmap->rstart,ncol;
1916   Mat            B;
1917   MatScalar      *array;
1918 
1919   PetscFunctionBegin;
1920   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1921 
1922   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1923   ai = Aloc->i; aj = Aloc->j;
1924   bi = Bloc->i; bj = Bloc->j;
1925   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1926     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1927     PetscSFNode          *oloc;
1928     PETSC_UNUSED PetscSF sf;
1929 
1930     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1931     /* compute d_nnz for preallocation */
1932     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1933     for (i=0; i<ai[ma]; i++) {
1934       d_nnz[aj[i]]++;
1935       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1936     }
1937     /* compute local off-diagonal contributions */
1938     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1939     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1940     /* map those to global */
1941     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1942     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1943     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1944     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1945     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1946     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1947     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1948 
1949     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1950     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1951     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1952     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1953     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1954     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1955   } else {
1956     B    = *matout;
1957     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1958     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1959   }
1960 
1961   /* copy over the A part */
1962   array = Aloc->a;
1963   row   = A->rmap->rstart;
1964   for (i=0; i<ma; i++) {
1965     ncol = ai[i+1]-ai[i];
1966     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1967     row++;
1968     array += ncol; aj += ncol;
1969   }
1970   aj = Aloc->j;
1971   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1972 
1973   /* copy over the B part */
1974   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1975   array = Bloc->a;
1976   row   = A->rmap->rstart;
1977   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1978   cols_tmp = cols;
1979   for (i=0; i<mb; i++) {
1980     ncol = bi[i+1]-bi[i];
1981     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1982     row++;
1983     array += ncol; cols_tmp += ncol;
1984   }
1985   ierr = PetscFree(cols);CHKERRQ(ierr);
1986 
1987   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1990     *matout = B;
1991   } else {
1992     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2000 {
2001   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2002   Mat            a    = aij->A,b = aij->B;
2003   PetscErrorCode ierr;
2004   PetscInt       s1,s2,s3;
2005 
2006   PetscFunctionBegin;
2007   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2008   if (rr) {
2009     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2010     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2011     /* Overlap communication with computation. */
2012     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2013   }
2014   if (ll) {
2015     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2016     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2017     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2018   }
2019   /* scale  the diagonal block */
2020   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2021 
2022   if (rr) {
2023     /* Do a scatter end and then right scale the off-diagonal block */
2024     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2025     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2026   }
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2032 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2033 {
2034   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2035   PetscErrorCode ierr;
2036 
2037   PetscFunctionBegin;
2038   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatEqual_MPIAIJ"
2044 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2045 {
2046   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2047   Mat            a,b,c,d;
2048   PetscBool      flg;
2049   PetscErrorCode ierr;
2050 
2051   PetscFunctionBegin;
2052   a = matA->A; b = matA->B;
2053   c = matB->A; d = matB->B;
2054 
2055   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2056   if (flg) {
2057     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2058   }
2059   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatCopy_MPIAIJ"
2065 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2066 {
2067   PetscErrorCode ierr;
2068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2069   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2070 
2071   PetscFunctionBegin;
2072   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2073   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2074     /* because of the column compression in the off-processor part of the matrix a->B,
2075        the number of columns in a->B and b->B may be different, hence we cannot call
2076        the MatCopy() directly on the two parts. If need be, we can provide a more
2077        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2078        then copying the submatrices */
2079     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2080   } else {
2081     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2082     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatSetUp_MPIAIJ"
2089 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2090 {
2091   PetscErrorCode ierr;
2092 
2093   PetscFunctionBegin;
2094   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2100 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2101 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2102 {
2103   PetscInt       i,m=Y->rmap->N;
2104   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2105   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2106   const PetscInt *xi = x->i,*yi = y->i;
2107 
2108   PetscFunctionBegin;
2109   /* Set the number of nonzeros in the new matrix */
2110   for (i=0; i<m; i++) {
2111     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2112     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2113     nnz[i] = 0;
2114     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2115       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2116       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2117       nnz[i]++;
2118     }
2119     for (; k<nzy; k++) nnz[i]++;
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatAXPY_MPIAIJ"
2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2127 {
2128   PetscErrorCode ierr;
2129   PetscInt       i;
2130   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2131   PetscBLASInt   bnz,one=1;
2132   Mat_SeqAIJ     *x,*y;
2133 
2134   PetscFunctionBegin;
2135   if (str == SAME_NONZERO_PATTERN) {
2136     PetscScalar alpha = a;
2137     x    = (Mat_SeqAIJ*)xx->A->data;
2138     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2139     y    = (Mat_SeqAIJ*)yy->A->data;
2140     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2141     x    = (Mat_SeqAIJ*)xx->B->data;
2142     y    = (Mat_SeqAIJ*)yy->B->data;
2143     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2144     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2145   } else if (str == SUBSET_NONZERO_PATTERN) {
2146     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2147 
2148     x = (Mat_SeqAIJ*)xx->B->data;
2149     y = (Mat_SeqAIJ*)yy->B->data;
2150     if (y->xtoy && y->XtoY != xx->B) {
2151       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2152       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2153     }
2154     if (!y->xtoy) { /* get xtoy */
2155       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2156       y->XtoY = xx->B;
2157       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2158     }
2159     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2160   } else {
2161     Mat      B;
2162     PetscInt *nnz_d,*nnz_o;
2163     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2164     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2165     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2166     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2167     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2168     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2169     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2170     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2171     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2172     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2173     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2174     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2175     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2176     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2177   }
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2182 
2183 #undef __FUNCT__
2184 #define __FUNCT__ "MatConjugate_MPIAIJ"
2185 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2186 {
2187 #if defined(PETSC_USE_COMPLEX)
2188   PetscErrorCode ierr;
2189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2190 
2191   PetscFunctionBegin;
2192   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2193   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2194 #else
2195   PetscFunctionBegin;
2196 #endif
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatRealPart_MPIAIJ"
2202 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2209   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2215 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2216 {
2217   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2218   PetscErrorCode ierr;
2219 
2220   PetscFunctionBegin;
2221   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2222   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #if defined(PETSC_HAVE_PBGL)
2227 
2228 #include <boost/parallel/mpi/bsp_process_group.hpp>
2229 #include <boost/graph/distributed/ilu_default_graph.hpp>
2230 #include <boost/graph/distributed/ilu_0_block.hpp>
2231 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2232 #include <boost/graph/distributed/petsc/interface.hpp>
2233 #include <boost/multi_array.hpp>
2234 #include <boost/parallel/distributed_property_map->hpp>
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2238 /*
2239   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2240 */
2241 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2242 {
2243   namespace petsc = boost::distributed::petsc;
2244 
2245   namespace graph_dist = boost::graph::distributed;
2246   using boost::graph::distributed::ilu_default::process_group_type;
2247   using boost::graph::ilu_permuted;
2248 
2249   PetscBool      row_identity, col_identity;
2250   PetscContainer c;
2251   PetscInt       m, n, M, N;
2252   PetscErrorCode ierr;
2253 
2254   PetscFunctionBegin;
2255   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2256   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2257   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2258   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2259 
2260   process_group_type pg;
2261   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2262   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2263   lgraph_type& level_graph = *lgraph_p;
2264   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2265 
2266   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2267   ilu_permuted(level_graph);
2268 
2269   /* put together the new matrix */
2270   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2271   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2272   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2273   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2274   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2275   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2276   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2277   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2278 
2279   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2280   ierr = PetscContainerSetPointer(c, lgraph_p);
2281   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2282   ierr = PetscContainerDestroy(&c);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2288 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2289 {
2290   PetscFunctionBegin;
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatSolve_MPIAIJ"
2296 /*
2297   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2298 */
2299 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2300 {
2301   namespace graph_dist = boost::graph::distributed;
2302 
2303   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2304   lgraph_type    *lgraph_p;
2305   PetscContainer c;
2306   PetscErrorCode ierr;
2307 
2308   PetscFunctionBegin;
2309   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2310   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2311   ierr = VecCopy(b, x);CHKERRQ(ierr);
2312 
2313   PetscScalar *array_x;
2314   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2315   PetscInt sx;
2316   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2317 
2318   PetscScalar *array_b;
2319   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2320   PetscInt sb;
2321   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2322 
2323   lgraph_type& level_graph = *lgraph_p;
2324   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2325 
2326   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2327   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2328   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2329 
2330   typedef boost::iterator_property_map<array_ref_type::iterator,
2331                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2332   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2333   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2334 
2335   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2336   PetscFunctionReturn(0);
2337 }
2338 #endif
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatDestroy_MatRedundant"
2342 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2343 {
2344   PetscErrorCode ierr;
2345   Mat_Redundant  *redund;
2346   PetscInt       i;
2347   PetscMPIInt    size;
2348 
2349   PetscFunctionBegin;
2350   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2351   if (size == 1) {
2352     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2353     redund = a->redundant;
2354   } else {
2355     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2356     redund = a->redundant;
2357   }
2358   if (redund){
2359     if (redund->matseq) { /* via MatGetSubMatrices()  */
2360       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2361       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2362       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2363       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2364     } else {
2365       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2366       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2367       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2368       for (i=0; i<redund->nrecvs; i++) {
2369         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2370         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2371       }
2372       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2373     }
2374 
2375     if (redund->psubcomm) {
2376       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2377     }
2378     ierr = redund->Destroy(A);CHKERRQ(ierr);
2379     ierr = PetscFree(redund);CHKERRQ(ierr);
2380   }
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2386 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2387 {
2388   PetscMPIInt    rank,size;
2389   MPI_Comm       comm;
2390   PetscErrorCode ierr;
2391   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2392   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2393   PetscInt       *rowrange = mat->rmap->range;
2394   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2395   Mat            A = aij->A,B=aij->B,C=*matredundant;
2396   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2397   PetscScalar    *sbuf_a;
2398   PetscInt       nzlocal=a->nz+b->nz;
2399   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2400   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2401   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2402   MatScalar      *aworkA,*aworkB;
2403   PetscScalar    *vals;
2404   PetscMPIInt    tag1,tag2,tag3,imdex;
2405   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2406   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2407   MPI_Status     recv_status,*send_status;
2408   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2409   PetscInt       **rbuf_j=NULL;
2410   PetscScalar    **rbuf_a=NULL;
2411   Mat_Redundant  *redund =NULL;
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2415   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2417   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2419 
2420   if (reuse == MAT_REUSE_MATRIX) {
2421     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2422     if (subsize == 1) {
2423       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2424       redund = c->redundant;
2425     } else {
2426       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2427       redund = c->redundant;
2428     }
2429     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2430 
2431     nsends    = redund->nsends;
2432     nrecvs    = redund->nrecvs;
2433     send_rank = redund->send_rank;
2434     recv_rank = redund->recv_rank;
2435     sbuf_nz   = redund->sbuf_nz;
2436     rbuf_nz   = redund->rbuf_nz;
2437     sbuf_j    = redund->sbuf_j;
2438     sbuf_a    = redund->sbuf_a;
2439     rbuf_j    = redund->rbuf_j;
2440     rbuf_a    = redund->rbuf_a;
2441   }
2442 
2443   if (reuse == MAT_INITIAL_MATRIX) {
2444     PetscInt    nleftover,np_subcomm;
2445 
2446     /* get the destination processors' id send_rank, nsends and nrecvs */
2447     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2448 
2449     np_subcomm = size/nsubcomm;
2450     nleftover  = size - nsubcomm*np_subcomm;
2451 
2452     /* block of codes below is specific for INTERLACED */
2453     /* ------------------------------------------------*/
2454     nsends = 0; nrecvs = 0;
2455     for (i=0; i<size; i++) {
2456       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2457         send_rank[nsends++] = i;
2458         recv_rank[nrecvs++] = i;
2459       }
2460     }
2461     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2462       i = size-nleftover-1;
2463       j = 0;
2464       while (j < nsubcomm - nleftover) {
2465         send_rank[nsends++] = i;
2466         i--; j++;
2467       }
2468     }
2469 
2470     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2471       for (i=0; i<nleftover; i++) {
2472         recv_rank[nrecvs++] = size-nleftover+i;
2473       }
2474     }
2475     /*----------------------------------------------*/
2476 
2477     /* allocate sbuf_j, sbuf_a */
2478     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2479     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2480     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2481     /*
2482     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2483     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2484      */
2485   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2486 
2487   /* copy mat's local entries into the buffers */
2488   if (reuse == MAT_INITIAL_MATRIX) {
2489     rownz_max = 0;
2490     rptr      = sbuf_j;
2491     cols      = sbuf_j + rend-rstart + 1;
2492     vals      = sbuf_a;
2493     rptr[0]   = 0;
2494     for (i=0; i<rend-rstart; i++) {
2495       row    = i + rstart;
2496       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2497       ncols  = nzA + nzB;
2498       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2499       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2500       /* load the column indices for this row into cols */
2501       lwrite = 0;
2502       for (l=0; l<nzB; l++) {
2503         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2504           vals[lwrite]   = aworkB[l];
2505           cols[lwrite++] = ctmp;
2506         }
2507       }
2508       for (l=0; l<nzA; l++) {
2509         vals[lwrite]   = aworkA[l];
2510         cols[lwrite++] = cstart + cworkA[l];
2511       }
2512       for (l=0; l<nzB; l++) {
2513         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2514           vals[lwrite]   = aworkB[l];
2515           cols[lwrite++] = ctmp;
2516         }
2517       }
2518       vals     += ncols;
2519       cols     += ncols;
2520       rptr[i+1] = rptr[i] + ncols;
2521       if (rownz_max < ncols) rownz_max = ncols;
2522     }
2523     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2524   } else { /* only copy matrix values into sbuf_a */
2525     rptr    = sbuf_j;
2526     vals    = sbuf_a;
2527     rptr[0] = 0;
2528     for (i=0; i<rend-rstart; i++) {
2529       row    = i + rstart;
2530       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2531       ncols  = nzA + nzB;
2532       cworkB = b->j + b->i[i];
2533       aworkA = a->a + a->i[i];
2534       aworkB = b->a + b->i[i];
2535       lwrite = 0;
2536       for (l=0; l<nzB; l++) {
2537         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2538       }
2539       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2540       for (l=0; l<nzB; l++) {
2541         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2542       }
2543       vals     += ncols;
2544       rptr[i+1] = rptr[i] + ncols;
2545     }
2546   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2547 
2548   /* send nzlocal to others, and recv other's nzlocal */
2549   /*--------------------------------------------------*/
2550   if (reuse == MAT_INITIAL_MATRIX) {
2551     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2552 
2553     s_waits2 = s_waits3 + nsends;
2554     s_waits1 = s_waits2 + nsends;
2555     r_waits1 = s_waits1 + nsends;
2556     r_waits2 = r_waits1 + nrecvs;
2557     r_waits3 = r_waits2 + nrecvs;
2558   } else {
2559     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2560 
2561     r_waits3 = s_waits3 + nsends;
2562   }
2563 
2564   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2565   if (reuse == MAT_INITIAL_MATRIX) {
2566     /* get new tags to keep the communication clean */
2567     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2568     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2569     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2570 
2571     /* post receives of other's nzlocal */
2572     for (i=0; i<nrecvs; i++) {
2573       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2574     }
2575     /* send nzlocal to others */
2576     for (i=0; i<nsends; i++) {
2577       sbuf_nz[i] = nzlocal;
2578       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2579     }
2580     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2581     count = nrecvs;
2582     while (count) {
2583       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2584 
2585       recv_rank[imdex] = recv_status.MPI_SOURCE;
2586       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2587       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2588 
2589       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2590 
2591       rbuf_nz[imdex] += i + 2;
2592 
2593       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2594       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2595       count--;
2596     }
2597     /* wait on sends of nzlocal */
2598     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2599     /* send mat->i,j to others, and recv from other's */
2600     /*------------------------------------------------*/
2601     for (i=0; i<nsends; i++) {
2602       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2603       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2604     }
2605     /* wait on receives of mat->i,j */
2606     /*------------------------------*/
2607     count = nrecvs;
2608     while (count) {
2609       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2610       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2611       count--;
2612     }
2613     /* wait on sends of mat->i,j */
2614     /*---------------------------*/
2615     if (nsends) {
2616       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2617     }
2618   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2619 
2620   /* post receives, send and receive mat->a */
2621   /*----------------------------------------*/
2622   for (imdex=0; imdex<nrecvs; imdex++) {
2623     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2624   }
2625   for (i=0; i<nsends; i++) {
2626     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2627   }
2628   count = nrecvs;
2629   while (count) {
2630     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2631     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2632     count--;
2633   }
2634   if (nsends) {
2635     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2636   }
2637 
2638   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2639 
2640   /* create redundant matrix */
2641   /*-------------------------*/
2642   if (reuse == MAT_INITIAL_MATRIX) {
2643     const PetscInt *range;
2644     PetscInt       rstart_sub,rend_sub,mloc_sub;
2645 
2646     /* compute rownz_max for preallocation */
2647     for (imdex=0; imdex<nrecvs; imdex++) {
2648       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2649       rptr = rbuf_j[imdex];
2650       for (i=0; i<j; i++) {
2651         ncols = rptr[i+1] - rptr[i];
2652         if (rownz_max < ncols) rownz_max = ncols;
2653       }
2654     }
2655 
2656     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2657 
2658     /* get local size of redundant matrix
2659        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2660     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2661     rstart_sub = range[nsubcomm*subrank];
2662     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2663       rend_sub = range[nsubcomm*(subrank+1)];
2664     } else {
2665       rend_sub = mat->rmap->N;
2666     }
2667     mloc_sub = rend_sub - rstart_sub;
2668 
2669     if (M == N) {
2670       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2671     } else { /* non-square matrix */
2672       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2673     }
2674     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2675     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2676     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2677     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2678   } else {
2679     C = *matredundant;
2680   }
2681 
2682   /* insert local matrix entries */
2683   rptr = sbuf_j;
2684   cols = sbuf_j + rend-rstart + 1;
2685   vals = sbuf_a;
2686   for (i=0; i<rend-rstart; i++) {
2687     row   = i + rstart;
2688     ncols = rptr[i+1] - rptr[i];
2689     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2690     vals += ncols;
2691     cols += ncols;
2692   }
2693   /* insert received matrix entries */
2694   for (imdex=0; imdex<nrecvs; imdex++) {
2695     rstart = rowrange[recv_rank[imdex]];
2696     rend   = rowrange[recv_rank[imdex]+1];
2697     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2698     rptr   = rbuf_j[imdex];
2699     cols   = rbuf_j[imdex] + rend-rstart + 1;
2700     vals   = rbuf_a[imdex];
2701     for (i=0; i<rend-rstart; i++) {
2702       row   = i + rstart;
2703       ncols = rptr[i+1] - rptr[i];
2704       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2705       vals += ncols;
2706       cols += ncols;
2707     }
2708   }
2709   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2710   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711 
2712   if (reuse == MAT_INITIAL_MATRIX) {
2713     *matredundant = C;
2714 
2715     /* create a supporting struct and attach it to C for reuse */
2716     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2717     if (subsize == 1) {
2718       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2719       c->redundant = redund;
2720     } else {
2721       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2722       c->redundant = redund;
2723     }
2724 
2725     redund->nzlocal   = nzlocal;
2726     redund->nsends    = nsends;
2727     redund->nrecvs    = nrecvs;
2728     redund->send_rank = send_rank;
2729     redund->recv_rank = recv_rank;
2730     redund->sbuf_nz   = sbuf_nz;
2731     redund->rbuf_nz   = rbuf_nz;
2732     redund->sbuf_j    = sbuf_j;
2733     redund->sbuf_a    = sbuf_a;
2734     redund->rbuf_j    = rbuf_j;
2735     redund->rbuf_a    = rbuf_a;
2736     redund->psubcomm  = NULL;
2737 
2738     redund->Destroy = C->ops->destroy;
2739     C->ops->destroy = MatDestroy_MatRedundant;
2740   }
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2746 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2747 {
2748   PetscErrorCode ierr;
2749   MPI_Comm       comm;
2750   PetscMPIInt    size,subsize;
2751   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2752   Mat_Redundant  *redund=NULL;
2753   PetscSubcomm   psubcomm=NULL;
2754   MPI_Comm       subcomm_in=subcomm;
2755   Mat            *matseq;
2756   IS             isrow,iscol;
2757 
2758   PetscFunctionBegin;
2759   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2760     if (reuse ==  MAT_INITIAL_MATRIX) {
2761       /* create psubcomm, then get subcomm */
2762       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2763       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2764       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2765 
2766       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2767       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2768       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2769       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2770       subcomm = psubcomm->comm;
2771     } else { /* retrieve psubcomm and subcomm */
2772       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2773       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2774       if (subsize == 1) {
2775         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2776         redund = c->redundant;
2777       } else {
2778         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2779         redund = c->redundant;
2780       }
2781       psubcomm = redund->psubcomm;
2782     }
2783     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2784       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2785       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2786         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2787         if (subsize == 1) {
2788           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2789           c->redundant->psubcomm = psubcomm;
2790         } else {
2791           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2792           c->redundant->psubcomm = psubcomm ;
2793         }
2794       }
2795       PetscFunctionReturn(0);
2796     }
2797   }
2798 
2799   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2800   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2801   if (reuse == MAT_INITIAL_MATRIX) {
2802     /* create a local sequential matrix matseq[0] */
2803     mloc_sub = PETSC_DECIDE;
2804     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2805     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2806     rstart = rend - mloc_sub;
2807     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2808     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2809   } else { /* reuse == MAT_REUSE_MATRIX */
2810     if (subsize == 1) {
2811       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2812       redund = c->redundant;
2813     } else {
2814       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2815       redund = c->redundant;
2816     }
2817 
2818     isrow  = redund->isrow;
2819     iscol  = redund->iscol;
2820     matseq = redund->matseq;
2821   }
2822   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2823   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2824 
2825   if (reuse == MAT_INITIAL_MATRIX) {
2826     /* create a supporting struct and attach it to C for reuse */
2827     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2828     if (subsize == 1) {
2829       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2830       c->redundant = redund;
2831     } else {
2832       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2833       c->redundant = redund;
2834     }
2835     redund->isrow    = isrow;
2836     redund->iscol    = iscol;
2837     redund->matseq   = matseq;
2838     redund->psubcomm = psubcomm;
2839     redund->Destroy               = (*matredundant)->ops->destroy;
2840     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2841   }
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 #undef __FUNCT__
2846 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2847 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2848 {
2849   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2850   PetscErrorCode ierr;
2851   PetscInt       i,*idxb = 0;
2852   PetscScalar    *va,*vb;
2853   Vec            vtmp;
2854 
2855   PetscFunctionBegin;
2856   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2857   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2858   if (idx) {
2859     for (i=0; i<A->rmap->n; i++) {
2860       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2861     }
2862   }
2863 
2864   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2865   if (idx) {
2866     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2867   }
2868   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2869   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2870 
2871   for (i=0; i<A->rmap->n; i++) {
2872     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2873       va[i] = vb[i];
2874       if (idx) idx[i] = a->garray[idxb[i]];
2875     }
2876   }
2877 
2878   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2879   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2880   ierr = PetscFree(idxb);CHKERRQ(ierr);
2881   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2887 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2888 {
2889   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2890   PetscErrorCode ierr;
2891   PetscInt       i,*idxb = 0;
2892   PetscScalar    *va,*vb;
2893   Vec            vtmp;
2894 
2895   PetscFunctionBegin;
2896   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2897   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2898   if (idx) {
2899     for (i=0; i<A->cmap->n; i++) {
2900       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2901     }
2902   }
2903 
2904   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2905   if (idx) {
2906     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2907   }
2908   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2909   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2910 
2911   for (i=0; i<A->rmap->n; i++) {
2912     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2913       va[i] = vb[i];
2914       if (idx) idx[i] = a->garray[idxb[i]];
2915     }
2916   }
2917 
2918   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2919   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2920   ierr = PetscFree(idxb);CHKERRQ(ierr);
2921   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 #undef __FUNCT__
2926 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2927 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2928 {
2929   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2930   PetscInt       n      = A->rmap->n;
2931   PetscInt       cstart = A->cmap->rstart;
2932   PetscInt       *cmap  = mat->garray;
2933   PetscInt       *diagIdx, *offdiagIdx;
2934   Vec            diagV, offdiagV;
2935   PetscScalar    *a, *diagA, *offdiagA;
2936   PetscInt       r;
2937   PetscErrorCode ierr;
2938 
2939   PetscFunctionBegin;
2940   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2941   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2942   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2943   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2944   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2945   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2946   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2947   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2948   for (r = 0; r < n; ++r) {
2949     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2950       a[r]   = diagA[r];
2951       idx[r] = cstart + diagIdx[r];
2952     } else {
2953       a[r]   = offdiagA[r];
2954       idx[r] = cmap[offdiagIdx[r]];
2955     }
2956   }
2957   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2958   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2959   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2960   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2961   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2962   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2968 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2969 {
2970   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2971   PetscInt       n      = A->rmap->n;
2972   PetscInt       cstart = A->cmap->rstart;
2973   PetscInt       *cmap  = mat->garray;
2974   PetscInt       *diagIdx, *offdiagIdx;
2975   Vec            diagV, offdiagV;
2976   PetscScalar    *a, *diagA, *offdiagA;
2977   PetscInt       r;
2978   PetscErrorCode ierr;
2979 
2980   PetscFunctionBegin;
2981   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2982   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2983   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2984   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2985   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2986   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2987   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2988   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2989   for (r = 0; r < n; ++r) {
2990     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2991       a[r]   = diagA[r];
2992       idx[r] = cstart + diagIdx[r];
2993     } else {
2994       a[r]   = offdiagA[r];
2995       idx[r] = cmap[offdiagIdx[r]];
2996     }
2997   }
2998   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2999   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3000   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3001   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3002   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3003   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 #undef __FUNCT__
3008 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3009 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3010 {
3011   PetscErrorCode ierr;
3012   Mat            *dummy;
3013 
3014   PetscFunctionBegin;
3015   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3016   *newmat = *dummy;
3017   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 #undef __FUNCT__
3022 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3023 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3024 {
3025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 #undef __FUNCT__
3034 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3035 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3036 {
3037   PetscErrorCode ierr;
3038   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3039 
3040   PetscFunctionBegin;
3041   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3042   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3043   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3044   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 /* -------------------------------------------------------------------*/
3049 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3050                                        MatGetRow_MPIAIJ,
3051                                        MatRestoreRow_MPIAIJ,
3052                                        MatMult_MPIAIJ,
3053                                 /* 4*/ MatMultAdd_MPIAIJ,
3054                                        MatMultTranspose_MPIAIJ,
3055                                        MatMultTransposeAdd_MPIAIJ,
3056 #if defined(PETSC_HAVE_PBGL)
3057                                        MatSolve_MPIAIJ,
3058 #else
3059                                        0,
3060 #endif
3061                                        0,
3062                                        0,
3063                                 /*10*/ 0,
3064                                        0,
3065                                        0,
3066                                        MatSOR_MPIAIJ,
3067                                        MatTranspose_MPIAIJ,
3068                                 /*15*/ MatGetInfo_MPIAIJ,
3069                                        MatEqual_MPIAIJ,
3070                                        MatGetDiagonal_MPIAIJ,
3071                                        MatDiagonalScale_MPIAIJ,
3072                                        MatNorm_MPIAIJ,
3073                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3074                                        MatAssemblyEnd_MPIAIJ,
3075                                        MatSetOption_MPIAIJ,
3076                                        MatZeroEntries_MPIAIJ,
3077                                 /*24*/ MatZeroRows_MPIAIJ,
3078                                        0,
3079 #if defined(PETSC_HAVE_PBGL)
3080                                        0,
3081 #else
3082                                        0,
3083 #endif
3084                                        0,
3085                                        0,
3086                                 /*29*/ MatSetUp_MPIAIJ,
3087 #if defined(PETSC_HAVE_PBGL)
3088                                        0,
3089 #else
3090                                        0,
3091 #endif
3092                                        0,
3093                                        0,
3094                                        0,
3095                                 /*34*/ MatDuplicate_MPIAIJ,
3096                                        0,
3097                                        0,
3098                                        0,
3099                                        0,
3100                                 /*39*/ MatAXPY_MPIAIJ,
3101                                        MatGetSubMatrices_MPIAIJ,
3102                                        MatIncreaseOverlap_MPIAIJ,
3103                                        MatGetValues_MPIAIJ,
3104                                        MatCopy_MPIAIJ,
3105                                 /*44*/ MatGetRowMax_MPIAIJ,
3106                                        MatScale_MPIAIJ,
3107                                        0,
3108                                        0,
3109                                        MatZeroRowsColumns_MPIAIJ,
3110                                 /*49*/ MatSetRandom_MPIAIJ,
3111                                        0,
3112                                        0,
3113                                        0,
3114                                        0,
3115                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3116                                        0,
3117                                        MatSetUnfactored_MPIAIJ,
3118                                        MatPermute_MPIAIJ,
3119                                        0,
3120                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3121                                        MatDestroy_MPIAIJ,
3122                                        MatView_MPIAIJ,
3123                                        0,
3124                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3125                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3126                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3127                                        0,
3128                                        0,
3129                                        0,
3130                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3131                                        MatGetRowMinAbs_MPIAIJ,
3132                                        0,
3133                                        MatSetColoring_MPIAIJ,
3134                                        0,
3135                                        MatSetValuesAdifor_MPIAIJ,
3136                                 /*75*/ MatFDColoringApply_AIJ,
3137                                        0,
3138                                        0,
3139                                        0,
3140                                        MatFindZeroDiagonals_MPIAIJ,
3141                                 /*80*/ 0,
3142                                        0,
3143                                        0,
3144                                 /*83*/ MatLoad_MPIAIJ,
3145                                        0,
3146                                        0,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3151                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3152                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3153                                        MatPtAP_MPIAIJ_MPIAIJ,
3154                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3155                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3156                                        0,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                 /*99*/ 0,
3161                                        0,
3162                                        0,
3163                                        MatConjugate_MPIAIJ,
3164                                        0,
3165                                 /*104*/MatSetValuesRow_MPIAIJ,
3166                                        MatRealPart_MPIAIJ,
3167                                        MatImaginaryPart_MPIAIJ,
3168                                        0,
3169                                        0,
3170                                 /*109*/0,
3171                                        MatGetRedundantMatrix_MPIAIJ,
3172                                        MatGetRowMin_MPIAIJ,
3173                                        0,
3174                                        0,
3175                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                        0,
3180                                 /*119*/0,
3181                                        0,
3182                                        0,
3183                                        0,
3184                                        MatGetMultiProcBlock_MPIAIJ,
3185                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3186                                        MatGetColumnNorms_MPIAIJ,
3187                                        MatInvertBlockDiagonal_MPIAIJ,
3188                                        0,
3189                                        MatGetSubMatricesParallel_MPIAIJ,
3190                                 /*129*/0,
3191                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3192                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3193                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3194                                        0,
3195                                 /*134*/0,
3196                                        0,
3197                                        0,
3198                                        0,
3199                                        0,
3200                                 /*139*/0,
3201                                        0,
3202                                        0,
3203                                        MatFDColoringSetUp_MPIXAIJ
3204 };
3205 
3206 /* ----------------------------------------------------------------------------------------*/
3207 
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3210 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3211 {
3212   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3213   PetscErrorCode ierr;
3214 
3215   PetscFunctionBegin;
3216   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3217   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 #undef __FUNCT__
3222 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3223 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3224 {
3225   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3226   PetscErrorCode ierr;
3227 
3228   PetscFunctionBegin;
3229   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3230   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3231   PetscFunctionReturn(0);
3232 }
3233 
3234 #undef __FUNCT__
3235 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3236 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3237 {
3238   Mat_MPIAIJ     *b;
3239   PetscErrorCode ierr;
3240 
3241   PetscFunctionBegin;
3242   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3244   b = (Mat_MPIAIJ*)B->data;
3245 
3246   if (!B->preallocated) {
3247     /* Explicitly create 2 MATSEQAIJ matrices. */
3248     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3249     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3250     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3251     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3252     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3253     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3254     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3255     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3256     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3257     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3258   }
3259 
3260   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3261   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3262   B->preallocated = PETSC_TRUE;
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 #undef __FUNCT__
3267 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3268 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3269 {
3270   Mat            mat;
3271   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3272   PetscErrorCode ierr;
3273 
3274   PetscFunctionBegin;
3275   *newmat = 0;
3276   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3277   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3278   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3279   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3280   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3281   a       = (Mat_MPIAIJ*)mat->data;
3282 
3283   mat->factortype   = matin->factortype;
3284   mat->assembled    = PETSC_TRUE;
3285   mat->insertmode   = NOT_SET_VALUES;
3286   mat->preallocated = PETSC_TRUE;
3287 
3288   a->size         = oldmat->size;
3289   a->rank         = oldmat->rank;
3290   a->donotstash   = oldmat->donotstash;
3291   a->roworiented  = oldmat->roworiented;
3292   a->rowindices   = 0;
3293   a->rowvalues    = 0;
3294   a->getrowactive = PETSC_FALSE;
3295 
3296   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3297   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3298 
3299   if (oldmat->colmap) {
3300 #if defined(PETSC_USE_CTABLE)
3301     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3302 #else
3303     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3304     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3305     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3306 #endif
3307   } else a->colmap = 0;
3308   if (oldmat->garray) {
3309     PetscInt len;
3310     len  = oldmat->B->cmap->n;
3311     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3312     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3313     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3314   } else a->garray = 0;
3315 
3316   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3317   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3318   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3319   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3320   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3321   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3322   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3323   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3324   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3325   *newmat = mat;
3326   PetscFunctionReturn(0);
3327 }
3328 
3329 
3330 
3331 #undef __FUNCT__
3332 #define __FUNCT__ "MatLoad_MPIAIJ"
3333 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3334 {
3335   PetscScalar    *vals,*svals;
3336   MPI_Comm       comm;
3337   PetscErrorCode ierr;
3338   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3339   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3340   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3341   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3342   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3343   int            fd;
3344   PetscInt       bs = 1;
3345 
3346   PetscFunctionBegin;
3347   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3348   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3349   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3350   if (!rank) {
3351     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3352     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3353     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3354   }
3355 
3356   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3357   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3358   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3359 
3360   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3361 
3362   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3363   M    = header[1]; N = header[2];
3364   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3365   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3366   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3367 
3368   /* If global sizes are set, check if they are consistent with that given in the file */
3369   if (sizesset) {
3370     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3371   }
3372   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3373   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3374 
3375   /* determine ownership of all (block) rows */
3376   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3377   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3378   else m = newMat->rmap->n; /* Set by user */
3379 
3380   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3381   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3382 
3383   /* First process needs enough room for process with most rows */
3384   if (!rank) {
3385     mmax = rowners[1];
3386     for (i=2; i<=size; i++) {
3387       mmax = PetscMax(mmax, rowners[i]);
3388     }
3389   } else mmax = -1;             /* unused, but compilers complain */
3390 
3391   rowners[0] = 0;
3392   for (i=2; i<=size; i++) {
3393     rowners[i] += rowners[i-1];
3394   }
3395   rstart = rowners[rank];
3396   rend   = rowners[rank+1];
3397 
3398   /* distribute row lengths to all processors */
3399   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3400   if (!rank) {
3401     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3402     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3403     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3404     for (j=0; j<m; j++) {
3405       procsnz[0] += ourlens[j];
3406     }
3407     for (i=1; i<size; i++) {
3408       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3409       /* calculate the number of nonzeros on each processor */
3410       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3411         procsnz[i] += rowlengths[j];
3412       }
3413       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3414     }
3415     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3416   } else {
3417     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3418   }
3419 
3420   if (!rank) {
3421     /* determine max buffer needed and allocate it */
3422     maxnz = 0;
3423     for (i=0; i<size; i++) {
3424       maxnz = PetscMax(maxnz,procsnz[i]);
3425     }
3426     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3427 
3428     /* read in my part of the matrix column indices  */
3429     nz   = procsnz[0];
3430     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3431     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3432 
3433     /* read in every one elses and ship off */
3434     for (i=1; i<size; i++) {
3435       nz   = procsnz[i];
3436       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3437       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3438     }
3439     ierr = PetscFree(cols);CHKERRQ(ierr);
3440   } else {
3441     /* determine buffer space needed for message */
3442     nz = 0;
3443     for (i=0; i<m; i++) {
3444       nz += ourlens[i];
3445     }
3446     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3447 
3448     /* receive message of column indices*/
3449     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3450   }
3451 
3452   /* determine column ownership if matrix is not square */
3453   if (N != M) {
3454     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3455     else n = newMat->cmap->n;
3456     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3457     cstart = cend - n;
3458   } else {
3459     cstart = rstart;
3460     cend   = rend;
3461     n      = cend - cstart;
3462   }
3463 
3464   /* loop over local rows, determining number of off diagonal entries */
3465   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3466   jj   = 0;
3467   for (i=0; i<m; i++) {
3468     for (j=0; j<ourlens[i]; j++) {
3469       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3470       jj++;
3471     }
3472   }
3473 
3474   for (i=0; i<m; i++) {
3475     ourlens[i] -= offlens[i];
3476   }
3477   if (!sizesset) {
3478     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3479   }
3480 
3481   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3482 
3483   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3484 
3485   for (i=0; i<m; i++) {
3486     ourlens[i] += offlens[i];
3487   }
3488 
3489   if (!rank) {
3490     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3491 
3492     /* read in my part of the matrix numerical values  */
3493     nz   = procsnz[0];
3494     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3495 
3496     /* insert into matrix */
3497     jj      = rstart;
3498     smycols = mycols;
3499     svals   = vals;
3500     for (i=0; i<m; i++) {
3501       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3502       smycols += ourlens[i];
3503       svals   += ourlens[i];
3504       jj++;
3505     }
3506 
3507     /* read in other processors and ship out */
3508     for (i=1; i<size; i++) {
3509       nz   = procsnz[i];
3510       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3511       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3512     }
3513     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3514   } else {
3515     /* receive numeric values */
3516     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3517 
3518     /* receive message of values*/
3519     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3520 
3521     /* insert into matrix */
3522     jj      = rstart;
3523     smycols = mycols;
3524     svals   = vals;
3525     for (i=0; i<m; i++) {
3526       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3527       smycols += ourlens[i];
3528       svals   += ourlens[i];
3529       jj++;
3530     }
3531   }
3532   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3533   ierr = PetscFree(vals);CHKERRQ(ierr);
3534   ierr = PetscFree(mycols);CHKERRQ(ierr);
3535   ierr = PetscFree(rowners);CHKERRQ(ierr);
3536   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3537   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3543 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3544 {
3545   PetscErrorCode ierr;
3546   IS             iscol_local;
3547   PetscInt       csize;
3548 
3549   PetscFunctionBegin;
3550   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3551   if (call == MAT_REUSE_MATRIX) {
3552     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3553     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3554   } else {
3555     PetscInt cbs;
3556     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3557     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3558     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3559   }
3560   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3561   if (call == MAT_INITIAL_MATRIX) {
3562     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3563     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3564   }
3565   PetscFunctionReturn(0);
3566 }
3567 
3568 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3569 #undef __FUNCT__
3570 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3571 /*
3572     Not great since it makes two copies of the submatrix, first an SeqAIJ
3573   in local and then by concatenating the local matrices the end result.
3574   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3575 
3576   Note: This requires a sequential iscol with all indices.
3577 */
3578 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3579 {
3580   PetscErrorCode ierr;
3581   PetscMPIInt    rank,size;
3582   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3583   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3584   PetscBool      allcolumns, colflag;
3585   Mat            M,Mreuse;
3586   MatScalar      *vwork,*aa;
3587   MPI_Comm       comm;
3588   Mat_SeqAIJ     *aij;
3589 
3590   PetscFunctionBegin;
3591   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3592   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3593   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3594 
3595   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3596   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3597   if (colflag && ncol == mat->cmap->N) {
3598     allcolumns = PETSC_TRUE;
3599   } else {
3600     allcolumns = PETSC_FALSE;
3601   }
3602   if (call ==  MAT_REUSE_MATRIX) {
3603     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3604     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3605     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3606   } else {
3607     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3608   }
3609 
3610   /*
3611       m - number of local rows
3612       n - number of columns (same on all processors)
3613       rstart - first row in new global matrix generated
3614   */
3615   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3616   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3617   if (call == MAT_INITIAL_MATRIX) {
3618     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3619     ii  = aij->i;
3620     jj  = aij->j;
3621 
3622     /*
3623         Determine the number of non-zeros in the diagonal and off-diagonal
3624         portions of the matrix in order to do correct preallocation
3625     */
3626 
3627     /* first get start and end of "diagonal" columns */
3628     if (csize == PETSC_DECIDE) {
3629       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3630       if (mglobal == n) { /* square matrix */
3631         nlocal = m;
3632       } else {
3633         nlocal = n/size + ((n % size) > rank);
3634       }
3635     } else {
3636       nlocal = csize;
3637     }
3638     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3639     rstart = rend - nlocal;
3640     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3641 
3642     /* next, compute all the lengths */
3643     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3644     olens = dlens + m;
3645     for (i=0; i<m; i++) {
3646       jend = ii[i+1] - ii[i];
3647       olen = 0;
3648       dlen = 0;
3649       for (j=0; j<jend; j++) {
3650         if (*jj < rstart || *jj >= rend) olen++;
3651         else dlen++;
3652         jj++;
3653       }
3654       olens[i] = olen;
3655       dlens[i] = dlen;
3656     }
3657     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3658     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3659     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3660     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3661     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3662     ierr = PetscFree(dlens);CHKERRQ(ierr);
3663   } else {
3664     PetscInt ml,nl;
3665 
3666     M    = *newmat;
3667     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3668     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3669     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3670     /*
3671          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3672        rather than the slower MatSetValues().
3673     */
3674     M->was_assembled = PETSC_TRUE;
3675     M->assembled     = PETSC_FALSE;
3676   }
3677   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3678   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3679   ii   = aij->i;
3680   jj   = aij->j;
3681   aa   = aij->a;
3682   for (i=0; i<m; i++) {
3683     row   = rstart + i;
3684     nz    = ii[i+1] - ii[i];
3685     cwork = jj;     jj += nz;
3686     vwork = aa;     aa += nz;
3687     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3688   }
3689 
3690   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   *newmat = M;
3693 
3694   /* save submatrix used in processor for next request */
3695   if (call ==  MAT_INITIAL_MATRIX) {
3696     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3697     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3698   }
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 #undef __FUNCT__
3703 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3704 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3705 {
3706   PetscInt       m,cstart, cend,j,nnz,i,d;
3707   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3708   const PetscInt *JJ;
3709   PetscScalar    *values;
3710   PetscErrorCode ierr;
3711 
3712   PetscFunctionBegin;
3713   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3714 
3715   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3716   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3717   m      = B->rmap->n;
3718   cstart = B->cmap->rstart;
3719   cend   = B->cmap->rend;
3720   rstart = B->rmap->rstart;
3721 
3722   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3723 
3724 #if defined(PETSC_USE_DEBUGGING)
3725   for (i=0; i<m; i++) {
3726     nnz = Ii[i+1]- Ii[i];
3727     JJ  = J + Ii[i];
3728     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3729     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3730     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3731   }
3732 #endif
3733 
3734   for (i=0; i<m; i++) {
3735     nnz     = Ii[i+1]- Ii[i];
3736     JJ      = J + Ii[i];
3737     nnz_max = PetscMax(nnz_max,nnz);
3738     d       = 0;
3739     for (j=0; j<nnz; j++) {
3740       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3741     }
3742     d_nnz[i] = d;
3743     o_nnz[i] = nnz - d;
3744   }
3745   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3746   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3747 
3748   if (v) values = (PetscScalar*)v;
3749   else {
3750     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3751   }
3752 
3753   for (i=0; i<m; i++) {
3754     ii   = i + rstart;
3755     nnz  = Ii[i+1]- Ii[i];
3756     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3757   }
3758   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3759   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760 
3761   if (!v) {
3762     ierr = PetscFree(values);CHKERRQ(ierr);
3763   }
3764   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3770 /*@
3771    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3772    (the default parallel PETSc format).
3773 
3774    Collective on MPI_Comm
3775 
3776    Input Parameters:
3777 +  B - the matrix
3778 .  i - the indices into j for the start of each local row (starts with zero)
3779 .  j - the column indices for each local row (starts with zero)
3780 -  v - optional values in the matrix
3781 
3782    Level: developer
3783 
3784    Notes:
3785        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3786      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3787      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3788 
3789        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3790 
3791        The format which is used for the sparse matrix input, is equivalent to a
3792     row-major ordering.. i.e for the following matrix, the input data expected is
3793     as shown:
3794 
3795         1 0 0
3796         2 0 3     P0
3797        -------
3798         4 5 6     P1
3799 
3800      Process0 [P0]: rows_owned=[0,1]
3801         i =  {0,1,3}  [size = nrow+1  = 2+1]
3802         j =  {0,0,2}  [size = nz = 6]
3803         v =  {1,2,3}  [size = nz = 6]
3804 
3805      Process1 [P1]: rows_owned=[2]
3806         i =  {0,3}    [size = nrow+1  = 1+1]
3807         j =  {0,1,2}  [size = nz = 6]
3808         v =  {4,5,6}  [size = nz = 6]
3809 
3810 .keywords: matrix, aij, compressed row, sparse, parallel
3811 
3812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3813           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3814 @*/
3815 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3816 {
3817   PetscErrorCode ierr;
3818 
3819   PetscFunctionBegin;
3820   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 #undef __FUNCT__
3825 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3826 /*@C
3827    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3828    (the default parallel PETSc format).  For good matrix assembly performance
3829    the user should preallocate the matrix storage by setting the parameters
3830    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3831    performance can be increased by more than a factor of 50.
3832 
3833    Collective on MPI_Comm
3834 
3835    Input Parameters:
3836 +  A - the matrix
3837 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3838            (same value is used for all local rows)
3839 .  d_nnz - array containing the number of nonzeros in the various rows of the
3840            DIAGONAL portion of the local submatrix (possibly different for each row)
3841            or NULL, if d_nz is used to specify the nonzero structure.
3842            The size of this array is equal to the number of local rows, i.e 'm'.
3843            For matrices that will be factored, you must leave room for (and set)
3844            the diagonal entry even if it is zero.
3845 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3846            submatrix (same value is used for all local rows).
3847 -  o_nnz - array containing the number of nonzeros in the various rows of the
3848            OFF-DIAGONAL portion of the local submatrix (possibly different for
3849            each row) or NULL, if o_nz is used to specify the nonzero
3850            structure. The size of this array is equal to the number
3851            of local rows, i.e 'm'.
3852 
3853    If the *_nnz parameter is given then the *_nz parameter is ignored
3854 
3855    The AIJ format (also called the Yale sparse matrix format or
3856    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3857    storage.  The stored row and column indices begin with zero.
3858    See Users-Manual: ch_mat for details.
3859 
3860    The parallel matrix is partitioned such that the first m0 rows belong to
3861    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3862    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3863 
3864    The DIAGONAL portion of the local submatrix of a processor can be defined
3865    as the submatrix which is obtained by extraction the part corresponding to
3866    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3867    first row that belongs to the processor, r2 is the last row belonging to
3868    the this processor, and c1-c2 is range of indices of the local part of a
3869    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3870    common case of a square matrix, the row and column ranges are the same and
3871    the DIAGONAL part is also square. The remaining portion of the local
3872    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3873 
3874    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3875 
3876    You can call MatGetInfo() to get information on how effective the preallocation was;
3877    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3878    You can also run with the option -info and look for messages with the string
3879    malloc in them to see if additional memory allocation was needed.
3880 
3881    Example usage:
3882 
3883    Consider the following 8x8 matrix with 34 non-zero values, that is
3884    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3885    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3886    as follows:
3887 
3888 .vb
3889             1  2  0  |  0  3  0  |  0  4
3890     Proc0   0  5  6  |  7  0  0  |  8  0
3891             9  0 10  | 11  0  0  | 12  0
3892     -------------------------------------
3893            13  0 14  | 15 16 17  |  0  0
3894     Proc1   0 18  0  | 19 20 21  |  0  0
3895             0  0  0  | 22 23  0  | 24  0
3896     -------------------------------------
3897     Proc2  25 26 27  |  0  0 28  | 29  0
3898            30  0  0  | 31 32 33  |  0 34
3899 .ve
3900 
3901    This can be represented as a collection of submatrices as:
3902 
3903 .vb
3904       A B C
3905       D E F
3906       G H I
3907 .ve
3908 
3909    Where the submatrices A,B,C are owned by proc0, D,E,F are
3910    owned by proc1, G,H,I are owned by proc2.
3911 
3912    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3913    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3914    The 'M','N' parameters are 8,8, and have the same values on all procs.
3915 
3916    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3917    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3918    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3919    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3920    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3921    matrix, ans [DF] as another SeqAIJ matrix.
3922 
3923    When d_nz, o_nz parameters are specified, d_nz storage elements are
3924    allocated for every row of the local diagonal submatrix, and o_nz
3925    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3926    One way to choose d_nz and o_nz is to use the max nonzerors per local
3927    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3928    In this case, the values of d_nz,o_nz are:
3929 .vb
3930      proc0 : dnz = 2, o_nz = 2
3931      proc1 : dnz = 3, o_nz = 2
3932      proc2 : dnz = 1, o_nz = 4
3933 .ve
3934    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3935    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3936    for proc3. i.e we are using 12+15+10=37 storage locations to store
3937    34 values.
3938 
3939    When d_nnz, o_nnz parameters are specified, the storage is specified
3940    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3941    In the above case the values for d_nnz,o_nnz are:
3942 .vb
3943      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3944      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3945      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3946 .ve
3947    Here the space allocated is sum of all the above values i.e 34, and
3948    hence pre-allocation is perfect.
3949 
3950    Level: intermediate
3951 
3952 .keywords: matrix, aij, compressed row, sparse, parallel
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3955           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3956 @*/
3957 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3958 {
3959   PetscErrorCode ierr;
3960 
3961   PetscFunctionBegin;
3962   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3963   PetscValidType(B,1);
3964   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 #undef __FUNCT__
3969 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3970 /*@
3971      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3972          CSR format the local rows.
3973 
3974    Collective on MPI_Comm
3975 
3976    Input Parameters:
3977 +  comm - MPI communicator
3978 .  m - number of local rows (Cannot be PETSC_DECIDE)
3979 .  n - This value should be the same as the local size used in creating the
3980        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3981        calculated if N is given) For square matrices n is almost always m.
3982 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3983 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3984 .   i - row indices
3985 .   j - column indices
3986 -   a - matrix values
3987 
3988    Output Parameter:
3989 .   mat - the matrix
3990 
3991    Level: intermediate
3992 
3993    Notes:
3994        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3995      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3996      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3997 
3998        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3999 
4000        The format which is used for the sparse matrix input, is equivalent to a
4001     row-major ordering.. i.e for the following matrix, the input data expected is
4002     as shown:
4003 
4004         1 0 0
4005         2 0 3     P0
4006        -------
4007         4 5 6     P1
4008 
4009      Process0 [P0]: rows_owned=[0,1]
4010         i =  {0,1,3}  [size = nrow+1  = 2+1]
4011         j =  {0,0,2}  [size = nz = 6]
4012         v =  {1,2,3}  [size = nz = 6]
4013 
4014      Process1 [P1]: rows_owned=[2]
4015         i =  {0,3}    [size = nrow+1  = 1+1]
4016         j =  {0,1,2}  [size = nz = 6]
4017         v =  {4,5,6}  [size = nz = 6]
4018 
4019 .keywords: matrix, aij, compressed row, sparse, parallel
4020 
4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4022           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4023 @*/
4024 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4025 {
4026   PetscErrorCode ierr;
4027 
4028   PetscFunctionBegin;
4029   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4030   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4031   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4032   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4033   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4034   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4035   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4036   PetscFunctionReturn(0);
4037 }
4038 
4039 #undef __FUNCT__
4040 #define __FUNCT__ "MatCreateAIJ"
4041 /*@C
4042    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4043    (the default parallel PETSc format).  For good matrix assembly performance
4044    the user should preallocate the matrix storage by setting the parameters
4045    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4046    performance can be increased by more than a factor of 50.
4047 
4048    Collective on MPI_Comm
4049 
4050    Input Parameters:
4051 +  comm - MPI communicator
4052 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4053            This value should be the same as the local size used in creating the
4054            y vector for the matrix-vector product y = Ax.
4055 .  n - This value should be the same as the local size used in creating the
4056        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4057        calculated if N is given) For square matrices n is almost always m.
4058 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4059 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4060 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4061            (same value is used for all local rows)
4062 .  d_nnz - array containing the number of nonzeros in the various rows of the
4063            DIAGONAL portion of the local submatrix (possibly different for each row)
4064            or NULL, if d_nz is used to specify the nonzero structure.
4065            The size of this array is equal to the number of local rows, i.e 'm'.
4066 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4067            submatrix (same value is used for all local rows).
4068 -  o_nnz - array containing the number of nonzeros in the various rows of the
4069            OFF-DIAGONAL portion of the local submatrix (possibly different for
4070            each row) or NULL, if o_nz is used to specify the nonzero
4071            structure. The size of this array is equal to the number
4072            of local rows, i.e 'm'.
4073 
4074    Output Parameter:
4075 .  A - the matrix
4076 
4077    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4078    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4079    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4080 
4081    Notes:
4082    If the *_nnz parameter is given then the *_nz parameter is ignored
4083 
4084    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4085    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4086    storage requirements for this matrix.
4087 
4088    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4089    processor than it must be used on all processors that share the object for
4090    that argument.
4091 
4092    The user MUST specify either the local or global matrix dimensions
4093    (possibly both).
4094 
4095    The parallel matrix is partitioned across processors such that the
4096    first m0 rows belong to process 0, the next m1 rows belong to
4097    process 1, the next m2 rows belong to process 2 etc.. where
4098    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4099    values corresponding to [m x N] submatrix.
4100 
4101    The columns are logically partitioned with the n0 columns belonging
4102    to 0th partition, the next n1 columns belonging to the next
4103    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4104 
4105    The DIAGONAL portion of the local submatrix on any given processor
4106    is the submatrix corresponding to the rows and columns m,n
4107    corresponding to the given processor. i.e diagonal matrix on
4108    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4109    etc. The remaining portion of the local submatrix [m x (N-n)]
4110    constitute the OFF-DIAGONAL portion. The example below better
4111    illustrates this concept.
4112 
4113    For a square global matrix we define each processor's diagonal portion
4114    to be its local rows and the corresponding columns (a square submatrix);
4115    each processor's off-diagonal portion encompasses the remainder of the
4116    local matrix (a rectangular submatrix).
4117 
4118    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4119 
4120    When calling this routine with a single process communicator, a matrix of
4121    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4122    type of communicator, use the construction mechanism:
4123      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4124 
4125    By default, this format uses inodes (identical nodes) when possible.
4126    We search for consecutive rows with the same nonzero structure, thereby
4127    reusing matrix information to achieve increased efficiency.
4128 
4129    Options Database Keys:
4130 +  -mat_no_inode  - Do not use inodes
4131 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4132 -  -mat_aij_oneindex - Internally use indexing starting at 1
4133         rather than 0.  Note that when calling MatSetValues(),
4134         the user still MUST index entries starting at 0!
4135 
4136 
4137    Example usage:
4138 
4139    Consider the following 8x8 matrix with 34 non-zero values, that is
4140    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4141    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4142    as follows:
4143 
4144 .vb
4145             1  2  0  |  0  3  0  |  0  4
4146     Proc0   0  5  6  |  7  0  0  |  8  0
4147             9  0 10  | 11  0  0  | 12  0
4148     -------------------------------------
4149            13  0 14  | 15 16 17  |  0  0
4150     Proc1   0 18  0  | 19 20 21  |  0  0
4151             0  0  0  | 22 23  0  | 24  0
4152     -------------------------------------
4153     Proc2  25 26 27  |  0  0 28  | 29  0
4154            30  0  0  | 31 32 33  |  0 34
4155 .ve
4156 
4157    This can be represented as a collection of submatrices as:
4158 
4159 .vb
4160       A B C
4161       D E F
4162       G H I
4163 .ve
4164 
4165    Where the submatrices A,B,C are owned by proc0, D,E,F are
4166    owned by proc1, G,H,I are owned by proc2.
4167 
4168    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4169    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4170    The 'M','N' parameters are 8,8, and have the same values on all procs.
4171 
4172    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4173    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4174    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4175    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4176    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4177    matrix, ans [DF] as another SeqAIJ matrix.
4178 
4179    When d_nz, o_nz parameters are specified, d_nz storage elements are
4180    allocated for every row of the local diagonal submatrix, and o_nz
4181    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4182    One way to choose d_nz and o_nz is to use the max nonzerors per local
4183    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4184    In this case, the values of d_nz,o_nz are:
4185 .vb
4186      proc0 : dnz = 2, o_nz = 2
4187      proc1 : dnz = 3, o_nz = 2
4188      proc2 : dnz = 1, o_nz = 4
4189 .ve
4190    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4191    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4192    for proc3. i.e we are using 12+15+10=37 storage locations to store
4193    34 values.
4194 
4195    When d_nnz, o_nnz parameters are specified, the storage is specified
4196    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4197    In the above case the values for d_nnz,o_nnz are:
4198 .vb
4199      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4200      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4201      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4202 .ve
4203    Here the space allocated is sum of all the above values i.e 34, and
4204    hence pre-allocation is perfect.
4205 
4206    Level: intermediate
4207 
4208 .keywords: matrix, aij, compressed row, sparse, parallel
4209 
4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4211           MPIAIJ, MatCreateMPIAIJWithArrays()
4212 @*/
4213 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4214 {
4215   PetscErrorCode ierr;
4216   PetscMPIInt    size;
4217 
4218   PetscFunctionBegin;
4219   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4220   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4221   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4222   if (size > 1) {
4223     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4224     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4225   } else {
4226     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4227     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4228   }
4229   PetscFunctionReturn(0);
4230 }
4231 
4232 #undef __FUNCT__
4233 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4234 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4235 {
4236   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4237 
4238   PetscFunctionBegin;
4239   *Ad     = a->A;
4240   *Ao     = a->B;
4241   *colmap = a->garray;
4242   PetscFunctionReturn(0);
4243 }
4244 
4245 #undef __FUNCT__
4246 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4247 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4248 {
4249   PetscErrorCode ierr;
4250   PetscInt       i;
4251   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4252 
4253   PetscFunctionBegin;
4254   if (coloring->ctype == IS_COLORING_GLOBAL) {
4255     ISColoringValue *allcolors,*colors;
4256     ISColoring      ocoloring;
4257 
4258     /* set coloring for diagonal portion */
4259     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4260 
4261     /* set coloring for off-diagonal portion */
4262     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4263     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4264     for (i=0; i<a->B->cmap->n; i++) {
4265       colors[i] = allcolors[a->garray[i]];
4266     }
4267     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4268     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4269     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4270     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4271   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4272     ISColoringValue *colors;
4273     PetscInt        *larray;
4274     ISColoring      ocoloring;
4275 
4276     /* set coloring for diagonal portion */
4277     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4278     for (i=0; i<a->A->cmap->n; i++) {
4279       larray[i] = i + A->cmap->rstart;
4280     }
4281     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4282     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4283     for (i=0; i<a->A->cmap->n; i++) {
4284       colors[i] = coloring->colors[larray[i]];
4285     }
4286     ierr = PetscFree(larray);CHKERRQ(ierr);
4287     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4288     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4289     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4290 
4291     /* set coloring for off-diagonal portion */
4292     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4293     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4294     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4295     for (i=0; i<a->B->cmap->n; i++) {
4296       colors[i] = coloring->colors[larray[i]];
4297     }
4298     ierr = PetscFree(larray);CHKERRQ(ierr);
4299     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4300     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4301     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4302   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4303   PetscFunctionReturn(0);
4304 }
4305 
4306 #undef __FUNCT__
4307 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4308 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4309 {
4310   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4311   PetscErrorCode ierr;
4312 
4313   PetscFunctionBegin;
4314   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4315   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4316   PetscFunctionReturn(0);
4317 }
4318 
4319 #undef __FUNCT__
4320 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4321 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4322 {
4323   PetscErrorCode ierr;
4324   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4325   PetscInt       *indx;
4326 
4327   PetscFunctionBegin;
4328   /* This routine will ONLY return MPIAIJ type matrix */
4329   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4330   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4331   if (n == PETSC_DECIDE) {
4332     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4333   }
4334   /* Check sum(n) = N */
4335   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4336   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4337 
4338   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4339   rstart -= m;
4340 
4341   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4342   for (i=0; i<m; i++) {
4343     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4344     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4345     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4346   }
4347 
4348   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4349   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4350   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4351   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4352   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4353   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4354   PetscFunctionReturn(0);
4355 }
4356 
4357 #undef __FUNCT__
4358 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4359 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4360 {
4361   PetscErrorCode ierr;
4362   PetscInt       m,N,i,rstart,nnz,Ii;
4363   PetscInt       *indx;
4364   PetscScalar    *values;
4365 
4366   PetscFunctionBegin;
4367   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4368   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4369   for (i=0; i<m; i++) {
4370     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4371     Ii   = i + rstart;
4372     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4373     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4374   }
4375   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4376   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 #undef __FUNCT__
4381 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4382 /*@
4383       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4384                  matrices from each processor
4385 
4386     Collective on MPI_Comm
4387 
4388    Input Parameters:
4389 +    comm - the communicators the parallel matrix will live on
4390 .    inmat - the input sequential matrices
4391 .    n - number of local columns (or PETSC_DECIDE)
4392 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4393 
4394    Output Parameter:
4395 .    outmat - the parallel matrix generated
4396 
4397     Level: advanced
4398 
4399    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4400 
4401 @*/
4402 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4403 {
4404   PetscErrorCode ierr;
4405   PetscMPIInt    size;
4406 
4407   PetscFunctionBegin;
4408   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4409   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4410   if (size == 1) {
4411     if (scall == MAT_INITIAL_MATRIX) {
4412       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4413     } else {
4414       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4415     }
4416   } else {
4417     if (scall == MAT_INITIAL_MATRIX) {
4418       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4419     }
4420     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4421   }
4422   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 #undef __FUNCT__
4427 #define __FUNCT__ "MatFileSplit"
4428 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4429 {
4430   PetscErrorCode    ierr;
4431   PetscMPIInt       rank;
4432   PetscInt          m,N,i,rstart,nnz;
4433   size_t            len;
4434   const PetscInt    *indx;
4435   PetscViewer       out;
4436   char              *name;
4437   Mat               B;
4438   const PetscScalar *values;
4439 
4440   PetscFunctionBegin;
4441   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4442   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4443   /* Should this be the type of the diagonal block of A? */
4444   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4445   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4446   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4447   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4448   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4449   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4450   for (i=0; i<m; i++) {
4451     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4452     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4453     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4454   }
4455   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4456   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4457 
4458   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4459   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4460   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4461   sprintf(name,"%s.%d",outfile,rank);
4462   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4463   ierr = PetscFree(name);CHKERRQ(ierr);
4464   ierr = MatView(B,out);CHKERRQ(ierr);
4465   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4466   ierr = MatDestroy(&B);CHKERRQ(ierr);
4467   PetscFunctionReturn(0);
4468 }
4469 
4470 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4471 #undef __FUNCT__
4472 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4473 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4474 {
4475   PetscErrorCode      ierr;
4476   Mat_Merge_SeqsToMPI *merge;
4477   PetscContainer      container;
4478 
4479   PetscFunctionBegin;
4480   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4481   if (container) {
4482     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4495     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4496     ierr = PetscFree(merge);CHKERRQ(ierr);
4497     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4498   }
4499   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4500   PetscFunctionReturn(0);
4501 }
4502 
4503 #include <../src/mat/utils/freespace.h>
4504 #include <petscbt.h>
4505 
4506 #undef __FUNCT__
4507 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4508 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4509 {
4510   PetscErrorCode      ierr;
4511   MPI_Comm            comm;
4512   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4513   PetscMPIInt         size,rank,taga,*len_s;
4514   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4515   PetscInt            proc,m;
4516   PetscInt            **buf_ri,**buf_rj;
4517   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4518   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4519   MPI_Request         *s_waits,*r_waits;
4520   MPI_Status          *status;
4521   MatScalar           *aa=a->a;
4522   MatScalar           **abuf_r,*ba_i;
4523   Mat_Merge_SeqsToMPI *merge;
4524   PetscContainer      container;
4525 
4526   PetscFunctionBegin;
4527   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4528   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4529 
4530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4531   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4532 
4533   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4534   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4535 
4536   bi     = merge->bi;
4537   bj     = merge->bj;
4538   buf_ri = merge->buf_ri;
4539   buf_rj = merge->buf_rj;
4540 
4541   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4542   owners = merge->rowmap->range;
4543   len_s  = merge->len_s;
4544 
4545   /* send and recv matrix values */
4546   /*-----------------------------*/
4547   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4548   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4549 
4550   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4551   for (proc=0,k=0; proc<size; proc++) {
4552     if (!len_s[proc]) continue;
4553     i    = owners[proc];
4554     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4555     k++;
4556   }
4557 
4558   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4559   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4560   ierr = PetscFree(status);CHKERRQ(ierr);
4561 
4562   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4563   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4564 
4565   /* insert mat values of mpimat */
4566   /*----------------------------*/
4567   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4568   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4569 
4570   for (k=0; k<merge->nrecv; k++) {
4571     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4572     nrows       = *(buf_ri_k[k]);
4573     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4574     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4575   }
4576 
4577   /* set values of ba */
4578   m = merge->rowmap->n;
4579   for (i=0; i<m; i++) {
4580     arow = owners[rank] + i;
4581     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4582     bnzi = bi[i+1] - bi[i];
4583     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4584 
4585     /* add local non-zero vals of this proc's seqmat into ba */
4586     anzi   = ai[arow+1] - ai[arow];
4587     aj     = a->j + ai[arow];
4588     aa     = a->a + ai[arow];
4589     nextaj = 0;
4590     for (j=0; nextaj<anzi; j++) {
4591       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4592         ba_i[j] += aa[nextaj++];
4593       }
4594     }
4595 
4596     /* add received vals into ba */
4597     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4598       /* i-th row */
4599       if (i == *nextrow[k]) {
4600         anzi   = *(nextai[k]+1) - *nextai[k];
4601         aj     = buf_rj[k] + *(nextai[k]);
4602         aa     = abuf_r[k] + *(nextai[k]);
4603         nextaj = 0;
4604         for (j=0; nextaj<anzi; j++) {
4605           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4606             ba_i[j] += aa[nextaj++];
4607           }
4608         }
4609         nextrow[k]++; nextai[k]++;
4610       }
4611     }
4612     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4613   }
4614   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4615   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4616 
4617   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4618   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4619   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4620   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4621   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4622   PetscFunctionReturn(0);
4623 }
4624 
4625 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4626 
4627 #undef __FUNCT__
4628 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4629 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4630 {
4631   PetscErrorCode      ierr;
4632   Mat                 B_mpi;
4633   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4634   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4635   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4636   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4637   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4638   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4639   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4640   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4641   MPI_Status          *status;
4642   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4643   PetscBT             lnkbt;
4644   Mat_Merge_SeqsToMPI *merge;
4645   PetscContainer      container;
4646 
4647   PetscFunctionBegin;
4648   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4649 
4650   /* make sure it is a PETSc comm */
4651   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4652   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4653   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4654 
4655   ierr = PetscNew(&merge);CHKERRQ(ierr);
4656   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4657 
4658   /* determine row ownership */
4659   /*---------------------------------------------------------*/
4660   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4661   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4662   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4663   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4664   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4665   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4666   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4667 
4668   m      = merge->rowmap->n;
4669   owners = merge->rowmap->range;
4670 
4671   /* determine the number of messages to send, their lengths */
4672   /*---------------------------------------------------------*/
4673   len_s = merge->len_s;
4674 
4675   len          = 0; /* length of buf_si[] */
4676   merge->nsend = 0;
4677   for (proc=0; proc<size; proc++) {
4678     len_si[proc] = 0;
4679     if (proc == rank) {
4680       len_s[proc] = 0;
4681     } else {
4682       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4683       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4684     }
4685     if (len_s[proc]) {
4686       merge->nsend++;
4687       nrows = 0;
4688       for (i=owners[proc]; i<owners[proc+1]; i++) {
4689         if (ai[i+1] > ai[i]) nrows++;
4690       }
4691       len_si[proc] = 2*(nrows+1);
4692       len         += len_si[proc];
4693     }
4694   }
4695 
4696   /* determine the number and length of messages to receive for ij-structure */
4697   /*-------------------------------------------------------------------------*/
4698   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4699   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4700 
4701   /* post the Irecv of j-structure */
4702   /*-------------------------------*/
4703   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4704   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4705 
4706   /* post the Isend of j-structure */
4707   /*--------------------------------*/
4708   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4709 
4710   for (proc=0, k=0; proc<size; proc++) {
4711     if (!len_s[proc]) continue;
4712     i    = owners[proc];
4713     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4714     k++;
4715   }
4716 
4717   /* receives and sends of j-structure are complete */
4718   /*------------------------------------------------*/
4719   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4720   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4721 
4722   /* send and recv i-structure */
4723   /*---------------------------*/
4724   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4725   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4726 
4727   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4728   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4729   for (proc=0,k=0; proc<size; proc++) {
4730     if (!len_s[proc]) continue;
4731     /* form outgoing message for i-structure:
4732          buf_si[0]:                 nrows to be sent
4733                [1:nrows]:           row index (global)
4734                [nrows+1:2*nrows+1]: i-structure index
4735     */
4736     /*-------------------------------------------*/
4737     nrows       = len_si[proc]/2 - 1;
4738     buf_si_i    = buf_si + nrows+1;
4739     buf_si[0]   = nrows;
4740     buf_si_i[0] = 0;
4741     nrows       = 0;
4742     for (i=owners[proc]; i<owners[proc+1]; i++) {
4743       anzi = ai[i+1] - ai[i];
4744       if (anzi) {
4745         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4746         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4747         nrows++;
4748       }
4749     }
4750     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4751     k++;
4752     buf_si += len_si[proc];
4753   }
4754 
4755   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4756   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4757 
4758   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4759   for (i=0; i<merge->nrecv; i++) {
4760     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4761   }
4762 
4763   ierr = PetscFree(len_si);CHKERRQ(ierr);
4764   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4765   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4766   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4767   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4768   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4769   ierr = PetscFree(status);CHKERRQ(ierr);
4770 
4771   /* compute a local seq matrix in each processor */
4772   /*----------------------------------------------*/
4773   /* allocate bi array and free space for accumulating nonzero column info */
4774   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4775   bi[0] = 0;
4776 
4777   /* create and initialize a linked list */
4778   nlnk = N+1;
4779   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4780 
4781   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4782   len  = ai[owners[rank+1]] - ai[owners[rank]];
4783   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4784 
4785   current_space = free_space;
4786 
4787   /* determine symbolic info for each local row */
4788   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4789 
4790   for (k=0; k<merge->nrecv; k++) {
4791     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4792     nrows       = *buf_ri_k[k];
4793     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4794     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4795   }
4796 
4797   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4798   len  = 0;
4799   for (i=0; i<m; i++) {
4800     bnzi = 0;
4801     /* add local non-zero cols of this proc's seqmat into lnk */
4802     arow  = owners[rank] + i;
4803     anzi  = ai[arow+1] - ai[arow];
4804     aj    = a->j + ai[arow];
4805     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4806     bnzi += nlnk;
4807     /* add received col data into lnk */
4808     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4809       if (i == *nextrow[k]) { /* i-th row */
4810         anzi  = *(nextai[k]+1) - *nextai[k];
4811         aj    = buf_rj[k] + *nextai[k];
4812         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4813         bnzi += nlnk;
4814         nextrow[k]++; nextai[k]++;
4815       }
4816     }
4817     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4818 
4819     /* if free space is not available, make more free space */
4820     if (current_space->local_remaining<bnzi) {
4821       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4822       nspacedouble++;
4823     }
4824     /* copy data into free space, then initialize lnk */
4825     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4826     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4827 
4828     current_space->array           += bnzi;
4829     current_space->local_used      += bnzi;
4830     current_space->local_remaining -= bnzi;
4831 
4832     bi[i+1] = bi[i] + bnzi;
4833   }
4834 
4835   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4836 
4837   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4838   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4839   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4840 
4841   /* create symbolic parallel matrix B_mpi */
4842   /*---------------------------------------*/
4843   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4844   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4845   if (n==PETSC_DECIDE) {
4846     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4847   } else {
4848     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4849   }
4850   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4851   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4852   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4853   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4854   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4855 
4856   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4857   B_mpi->assembled    = PETSC_FALSE;
4858   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4859   merge->bi           = bi;
4860   merge->bj           = bj;
4861   merge->buf_ri       = buf_ri;
4862   merge->buf_rj       = buf_rj;
4863   merge->coi          = NULL;
4864   merge->coj          = NULL;
4865   merge->owners_co    = NULL;
4866 
4867   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4868 
4869   /* attach the supporting struct to B_mpi for reuse */
4870   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4871   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4872   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4873   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4874   *mpimat = B_mpi;
4875 
4876   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4877   PetscFunctionReturn(0);
4878 }
4879 
4880 #undef __FUNCT__
4881 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4882 /*@C
4883       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4884                  matrices from each processor
4885 
4886     Collective on MPI_Comm
4887 
4888    Input Parameters:
4889 +    comm - the communicators the parallel matrix will live on
4890 .    seqmat - the input sequential matrices
4891 .    m - number of local rows (or PETSC_DECIDE)
4892 .    n - number of local columns (or PETSC_DECIDE)
4893 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4894 
4895    Output Parameter:
4896 .    mpimat - the parallel matrix generated
4897 
4898     Level: advanced
4899 
4900    Notes:
4901      The dimensions of the sequential matrix in each processor MUST be the same.
4902      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4903      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4904 @*/
4905 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4906 {
4907   PetscErrorCode ierr;
4908   PetscMPIInt    size;
4909 
4910   PetscFunctionBegin;
4911   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4912   if (size == 1) {
4913     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4914     if (scall == MAT_INITIAL_MATRIX) {
4915       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4916     } else {
4917       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4918     }
4919     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920     PetscFunctionReturn(0);
4921   }
4922   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4923   if (scall == MAT_INITIAL_MATRIX) {
4924     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4925   }
4926   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4927   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4928   PetscFunctionReturn(0);
4929 }
4930 
4931 #undef __FUNCT__
4932 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4933 /*@
4934      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4935           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4936           with MatGetSize()
4937 
4938     Not Collective
4939 
4940    Input Parameters:
4941 +    A - the matrix
4942 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4943 
4944    Output Parameter:
4945 .    A_loc - the local sequential matrix generated
4946 
4947     Level: developer
4948 
4949 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4950 
4951 @*/
4952 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4953 {
4954   PetscErrorCode ierr;
4955   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4956   Mat_SeqAIJ     *mat,*a,*b;
4957   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4958   MatScalar      *aa,*ba,*cam;
4959   PetscScalar    *ca;
4960   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4961   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4962   PetscBool      match;
4963 
4964   PetscFunctionBegin;
4965   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4966   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4967   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4968   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4969   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4970   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4971   aa = a->a; ba = b->a;
4972   if (scall == MAT_INITIAL_MATRIX) {
4973     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4974     ci[0] = 0;
4975     for (i=0; i<am; i++) {
4976       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4977     }
4978     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4979     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4980     k    = 0;
4981     for (i=0; i<am; i++) {
4982       ncols_o = bi[i+1] - bi[i];
4983       ncols_d = ai[i+1] - ai[i];
4984       /* off-diagonal portion of A */
4985       for (jo=0; jo<ncols_o; jo++) {
4986         col = cmap[*bj];
4987         if (col >= cstart) break;
4988         cj[k]   = col; bj++;
4989         ca[k++] = *ba++;
4990       }
4991       /* diagonal portion of A */
4992       for (j=0; j<ncols_d; j++) {
4993         cj[k]   = cstart + *aj++;
4994         ca[k++] = *aa++;
4995       }
4996       /* off-diagonal portion of A */
4997       for (j=jo; j<ncols_o; j++) {
4998         cj[k]   = cmap[*bj++];
4999         ca[k++] = *ba++;
5000       }
5001     }
5002     /* put together the new matrix */
5003     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5004     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5005     /* Since these are PETSc arrays, change flags to free them as necessary. */
5006     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5007     mat->free_a  = PETSC_TRUE;
5008     mat->free_ij = PETSC_TRUE;
5009     mat->nonew   = 0;
5010   } else if (scall == MAT_REUSE_MATRIX) {
5011     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5012     ci = mat->i; cj = mat->j; cam = mat->a;
5013     for (i=0; i<am; i++) {
5014       /* off-diagonal portion of A */
5015       ncols_o = bi[i+1] - bi[i];
5016       for (jo=0; jo<ncols_o; jo++) {
5017         col = cmap[*bj];
5018         if (col >= cstart) break;
5019         *cam++ = *ba++; bj++;
5020       }
5021       /* diagonal portion of A */
5022       ncols_d = ai[i+1] - ai[i];
5023       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5024       /* off-diagonal portion of A */
5025       for (j=jo; j<ncols_o; j++) {
5026         *cam++ = *ba++; bj++;
5027       }
5028     }
5029   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5030   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5031   PetscFunctionReturn(0);
5032 }
5033 
5034 #undef __FUNCT__
5035 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5036 /*@C
5037      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5038 
5039     Not Collective
5040 
5041    Input Parameters:
5042 +    A - the matrix
5043 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5044 -    row, col - index sets of rows and columns to extract (or NULL)
5045 
5046    Output Parameter:
5047 .    A_loc - the local sequential matrix generated
5048 
5049     Level: developer
5050 
5051 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5052 
5053 @*/
5054 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5055 {
5056   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5057   PetscErrorCode ierr;
5058   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5059   IS             isrowa,iscola;
5060   Mat            *aloc;
5061   PetscBool      match;
5062 
5063   PetscFunctionBegin;
5064   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5065   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5066   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5067   if (!row) {
5068     start = A->rmap->rstart; end = A->rmap->rend;
5069     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5070   } else {
5071     isrowa = *row;
5072   }
5073   if (!col) {
5074     start = A->cmap->rstart;
5075     cmap  = a->garray;
5076     nzA   = a->A->cmap->n;
5077     nzB   = a->B->cmap->n;
5078     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5079     ncols = 0;
5080     for (i=0; i<nzB; i++) {
5081       if (cmap[i] < start) idx[ncols++] = cmap[i];
5082       else break;
5083     }
5084     imark = i;
5085     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5086     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5087     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5088   } else {
5089     iscola = *col;
5090   }
5091   if (scall != MAT_INITIAL_MATRIX) {
5092     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5093     aloc[0] = *A_loc;
5094   }
5095   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5096   *A_loc = aloc[0];
5097   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5098   if (!row) {
5099     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5100   }
5101   if (!col) {
5102     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5103   }
5104   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5105   PetscFunctionReturn(0);
5106 }
5107 
5108 #undef __FUNCT__
5109 #define __FUNCT__ "MatGetBrowsOfAcols"
5110 /*@C
5111     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5112 
5113     Collective on Mat
5114 
5115    Input Parameters:
5116 +    A,B - the matrices in mpiaij format
5117 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5118 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5119 
5120    Output Parameter:
5121 +    rowb, colb - index sets of rows and columns of B to extract
5122 -    B_seq - the sequential matrix generated
5123 
5124     Level: developer
5125 
5126 @*/
5127 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5128 {
5129   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5130   PetscErrorCode ierr;
5131   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5132   IS             isrowb,iscolb;
5133   Mat            *bseq=NULL;
5134 
5135   PetscFunctionBegin;
5136   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5137     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5138   }
5139   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5140 
5141   if (scall == MAT_INITIAL_MATRIX) {
5142     start = A->cmap->rstart;
5143     cmap  = a->garray;
5144     nzA   = a->A->cmap->n;
5145     nzB   = a->B->cmap->n;
5146     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5147     ncols = 0;
5148     for (i=0; i<nzB; i++) {  /* row < local row index */
5149       if (cmap[i] < start) idx[ncols++] = cmap[i];
5150       else break;
5151     }
5152     imark = i;
5153     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5154     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5155     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5156     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5157   } else {
5158     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5159     isrowb  = *rowb; iscolb = *colb;
5160     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5161     bseq[0] = *B_seq;
5162   }
5163   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5164   *B_seq = bseq[0];
5165   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5166   if (!rowb) {
5167     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5168   } else {
5169     *rowb = isrowb;
5170   }
5171   if (!colb) {
5172     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5173   } else {
5174     *colb = iscolb;
5175   }
5176   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5177   PetscFunctionReturn(0);
5178 }
5179 
5180 #undef __FUNCT__
5181 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5182 /*
5183     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5184     of the OFF-DIAGONAL portion of local A
5185 
5186     Collective on Mat
5187 
5188    Input Parameters:
5189 +    A,B - the matrices in mpiaij format
5190 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5191 
5192    Output Parameter:
5193 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5194 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5195 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5196 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5197 
5198     Level: developer
5199 
5200 */
5201 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5202 {
5203   VecScatter_MPI_General *gen_to,*gen_from;
5204   PetscErrorCode         ierr;
5205   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5206   Mat_SeqAIJ             *b_oth;
5207   VecScatter             ctx =a->Mvctx;
5208   MPI_Comm               comm;
5209   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5210   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5211   PetscScalar            *rvalues,*svalues;
5212   MatScalar              *b_otha,*bufa,*bufA;
5213   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5214   MPI_Request            *rwaits = NULL,*swaits = NULL;
5215   MPI_Status             *sstatus,rstatus;
5216   PetscMPIInt            jj;
5217   PetscInt               *cols,sbs,rbs;
5218   PetscScalar            *vals;
5219 
5220   PetscFunctionBegin;
5221   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5222   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5223     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5224   }
5225   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5226   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5227 
5228   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5229   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5230   rvalues  = gen_from->values; /* holds the length of receiving row */
5231   svalues  = gen_to->values;   /* holds the length of sending row */
5232   nrecvs   = gen_from->n;
5233   nsends   = gen_to->n;
5234 
5235   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5236   srow    = gen_to->indices;    /* local row index to be sent */
5237   sstarts = gen_to->starts;
5238   sprocs  = gen_to->procs;
5239   sstatus = gen_to->sstatus;
5240   sbs     = gen_to->bs;
5241   rstarts = gen_from->starts;
5242   rprocs  = gen_from->procs;
5243   rbs     = gen_from->bs;
5244 
5245   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5246   if (scall == MAT_INITIAL_MATRIX) {
5247     /* i-array */
5248     /*---------*/
5249     /*  post receives */
5250     for (i=0; i<nrecvs; i++) {
5251       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5252       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5253       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5254     }
5255 
5256     /* pack the outgoing message */
5257     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5258 
5259     sstartsj[0] = 0;
5260     rstartsj[0] = 0;
5261     len         = 0; /* total length of j or a array to be sent */
5262     k           = 0;
5263     for (i=0; i<nsends; i++) {
5264       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5265       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5266       for (j=0; j<nrows; j++) {
5267         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5268         for (l=0; l<sbs; l++) {
5269           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5270 
5271           rowlen[j*sbs+l] = ncols;
5272 
5273           len += ncols;
5274           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5275         }
5276         k++;
5277       }
5278       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5279 
5280       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5281     }
5282     /* recvs and sends of i-array are completed */
5283     i = nrecvs;
5284     while (i--) {
5285       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5286     }
5287     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5288 
5289     /* allocate buffers for sending j and a arrays */
5290     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5291     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5292 
5293     /* create i-array of B_oth */
5294     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5295 
5296     b_othi[0] = 0;
5297     len       = 0; /* total length of j or a array to be received */
5298     k         = 0;
5299     for (i=0; i<nrecvs; i++) {
5300       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5301       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5302       for (j=0; j<nrows; j++) {
5303         b_othi[k+1] = b_othi[k] + rowlen[j];
5304         len        += rowlen[j]; k++;
5305       }
5306       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5307     }
5308 
5309     /* allocate space for j and a arrrays of B_oth */
5310     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5311     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5312 
5313     /* j-array */
5314     /*---------*/
5315     /*  post receives of j-array */
5316     for (i=0; i<nrecvs; i++) {
5317       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5318       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5319     }
5320 
5321     /* pack the outgoing message j-array */
5322     k = 0;
5323     for (i=0; i<nsends; i++) {
5324       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5325       bufJ  = bufj+sstartsj[i];
5326       for (j=0; j<nrows; j++) {
5327         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5328         for (ll=0; ll<sbs; ll++) {
5329           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5330           for (l=0; l<ncols; l++) {
5331             *bufJ++ = cols[l];
5332           }
5333           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5334         }
5335       }
5336       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5337     }
5338 
5339     /* recvs and sends of j-array are completed */
5340     i = nrecvs;
5341     while (i--) {
5342       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5343     }
5344     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5345   } else if (scall == MAT_REUSE_MATRIX) {
5346     sstartsj = *startsj_s;
5347     rstartsj = *startsj_r;
5348     bufa     = *bufa_ptr;
5349     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5350     b_otha   = b_oth->a;
5351   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5352 
5353   /* a-array */
5354   /*---------*/
5355   /*  post receives of a-array */
5356   for (i=0; i<nrecvs; i++) {
5357     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5358     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5359   }
5360 
5361   /* pack the outgoing message a-array */
5362   k = 0;
5363   for (i=0; i<nsends; i++) {
5364     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5365     bufA  = bufa+sstartsj[i];
5366     for (j=0; j<nrows; j++) {
5367       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5368       for (ll=0; ll<sbs; ll++) {
5369         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5370         for (l=0; l<ncols; l++) {
5371           *bufA++ = vals[l];
5372         }
5373         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5374       }
5375     }
5376     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5377   }
5378   /* recvs and sends of a-array are completed */
5379   i = nrecvs;
5380   while (i--) {
5381     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5382   }
5383   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5384   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5385 
5386   if (scall == MAT_INITIAL_MATRIX) {
5387     /* put together the new matrix */
5388     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5389 
5390     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5391     /* Since these are PETSc arrays, change flags to free them as necessary. */
5392     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5393     b_oth->free_a  = PETSC_TRUE;
5394     b_oth->free_ij = PETSC_TRUE;
5395     b_oth->nonew   = 0;
5396 
5397     ierr = PetscFree(bufj);CHKERRQ(ierr);
5398     if (!startsj_s || !bufa_ptr) {
5399       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5400       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5401     } else {
5402       *startsj_s = sstartsj;
5403       *startsj_r = rstartsj;
5404       *bufa_ptr  = bufa;
5405     }
5406   }
5407   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5408   PetscFunctionReturn(0);
5409 }
5410 
5411 #undef __FUNCT__
5412 #define __FUNCT__ "MatGetCommunicationStructs"
5413 /*@C
5414   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5415 
5416   Not Collective
5417 
5418   Input Parameters:
5419 . A - The matrix in mpiaij format
5420 
5421   Output Parameter:
5422 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5423 . colmap - A map from global column index to local index into lvec
5424 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5425 
5426   Level: developer
5427 
5428 @*/
5429 #if defined(PETSC_USE_CTABLE)
5430 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5431 #else
5432 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5433 #endif
5434 {
5435   Mat_MPIAIJ *a;
5436 
5437   PetscFunctionBegin;
5438   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5439   PetscValidPointer(lvec, 2);
5440   PetscValidPointer(colmap, 3);
5441   PetscValidPointer(multScatter, 4);
5442   a = (Mat_MPIAIJ*) A->data;
5443   if (lvec) *lvec = a->lvec;
5444   if (colmap) *colmap = a->colmap;
5445   if (multScatter) *multScatter = a->Mvctx;
5446   PetscFunctionReturn(0);
5447 }
5448 
5449 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5450 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5452 
5453 #undef __FUNCT__
5454 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5455 /*
5456     Computes (B'*A')' since computing B*A directly is untenable
5457 
5458                n                       p                          p
5459         (              )       (              )         (                  )
5460       m (      A       )  *  n (       B      )   =   m (         C        )
5461         (              )       (              )         (                  )
5462 
5463 */
5464 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5465 {
5466   PetscErrorCode ierr;
5467   Mat            At,Bt,Ct;
5468 
5469   PetscFunctionBegin;
5470   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5471   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5472   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5473   ierr = MatDestroy(&At);CHKERRQ(ierr);
5474   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5475   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5476   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5477   PetscFunctionReturn(0);
5478 }
5479 
5480 #undef __FUNCT__
5481 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5482 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5483 {
5484   PetscErrorCode ierr;
5485   PetscInt       m=A->rmap->n,n=B->cmap->n;
5486   Mat            Cmat;
5487 
5488   PetscFunctionBegin;
5489   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5490   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5491   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5492   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5493   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5494   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5495   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5496   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5497 
5498   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5499 
5500   *C = Cmat;
5501   PetscFunctionReturn(0);
5502 }
5503 
5504 /* ----------------------------------------------------------------*/
5505 #undef __FUNCT__
5506 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5507 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5508 {
5509   PetscErrorCode ierr;
5510 
5511   PetscFunctionBegin;
5512   if (scall == MAT_INITIAL_MATRIX) {
5513     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5514     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5515     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5516   }
5517   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5518   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5519   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5520   PetscFunctionReturn(0);
5521 }
5522 
5523 #if defined(PETSC_HAVE_MUMPS)
5524 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5525 #endif
5526 #if defined(PETSC_HAVE_PASTIX)
5527 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5528 #endif
5529 #if defined(PETSC_HAVE_SUPERLU_DIST)
5530 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5531 #endif
5532 #if defined(PETSC_HAVE_CLIQUE)
5533 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5534 #endif
5535 
5536 /*MC
5537    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5538 
5539    Options Database Keys:
5540 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5541 
5542   Level: beginner
5543 
5544 .seealso: MatCreateAIJ()
5545 M*/
5546 
5547 #undef __FUNCT__
5548 #define __FUNCT__ "MatCreate_MPIAIJ"
5549 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5550 {
5551   Mat_MPIAIJ     *b;
5552   PetscErrorCode ierr;
5553   PetscMPIInt    size;
5554 
5555   PetscFunctionBegin;
5556   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5557 
5558   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5559   B->data       = (void*)b;
5560   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5561   B->assembled  = PETSC_FALSE;
5562   B->insertmode = NOT_SET_VALUES;
5563   b->size       = size;
5564 
5565   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5566 
5567   /* build cache for off array entries formed */
5568   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5569 
5570   b->donotstash  = PETSC_FALSE;
5571   b->colmap      = 0;
5572   b->garray      = 0;
5573   b->roworiented = PETSC_TRUE;
5574 
5575   /* stuff used for matrix vector multiply */
5576   b->lvec  = NULL;
5577   b->Mvctx = NULL;
5578 
5579   /* stuff for MatGetRow() */
5580   b->rowindices   = 0;
5581   b->rowvalues    = 0;
5582   b->getrowactive = PETSC_FALSE;
5583 
5584   /* flexible pointer used in CUSP/CUSPARSE classes */
5585   b->spptr = NULL;
5586 
5587 #if defined(PETSC_HAVE_MUMPS)
5588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5589 #endif
5590 #if defined(PETSC_HAVE_PASTIX)
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5592 #endif
5593 #if defined(PETSC_HAVE_SUPERLU_DIST)
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5595 #endif
5596 #if defined(PETSC_HAVE_CLIQUE)
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5598 #endif
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5613   PetscFunctionReturn(0);
5614 }
5615 
5616 #undef __FUNCT__
5617 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5618 /*@
5619      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5620          and "off-diagonal" part of the matrix in CSR format.
5621 
5622    Collective on MPI_Comm
5623 
5624    Input Parameters:
5625 +  comm - MPI communicator
5626 .  m - number of local rows (Cannot be PETSC_DECIDE)
5627 .  n - This value should be the same as the local size used in creating the
5628        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5629        calculated if N is given) For square matrices n is almost always m.
5630 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5631 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5632 .   i - row indices for "diagonal" portion of matrix
5633 .   j - column indices
5634 .   a - matrix values
5635 .   oi - row indices for "off-diagonal" portion of matrix
5636 .   oj - column indices
5637 -   oa - matrix values
5638 
5639    Output Parameter:
5640 .   mat - the matrix
5641 
5642    Level: advanced
5643 
5644    Notes:
5645        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5646        must free the arrays once the matrix has been destroyed and not before.
5647 
5648        The i and j indices are 0 based
5649 
5650        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5651 
5652        This sets local rows and cannot be used to set off-processor values.
5653 
5654        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5655        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5656        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5657        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5658        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5659        communication if it is known that only local entries will be set.
5660 
5661 .keywords: matrix, aij, compressed row, sparse, parallel
5662 
5663 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5664           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5665 @*/
5666 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5667 {
5668   PetscErrorCode ierr;
5669   Mat_MPIAIJ     *maij;
5670 
5671   PetscFunctionBegin;
5672   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5673   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5674   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5675   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5676   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5677   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5678   maij = (Mat_MPIAIJ*) (*mat)->data;
5679 
5680   (*mat)->preallocated = PETSC_TRUE;
5681 
5682   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5683   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5684 
5685   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5686   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5687 
5688   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692 
5693   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5696   PetscFunctionReturn(0);
5697 }
5698 
5699 /*
5700     Special version for direct calls from Fortran
5701 */
5702 #include <petsc-private/fortranimpl.h>
5703 
5704 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5705 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5706 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5707 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5708 #endif
5709 
5710 /* Change these macros so can be used in void function */
5711 #undef CHKERRQ
5712 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5713 #undef SETERRQ2
5714 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5715 #undef SETERRQ3
5716 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5717 #undef SETERRQ
5718 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5719 
5720 #undef __FUNCT__
5721 #define __FUNCT__ "matsetvaluesmpiaij_"
5722 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5723 {
5724   Mat            mat  = *mmat;
5725   PetscInt       m    = *mm, n = *mn;
5726   InsertMode     addv = *maddv;
5727   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5728   PetscScalar    value;
5729   PetscErrorCode ierr;
5730 
5731   MatCheckPreallocated(mat,1);
5732   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5733 
5734 #if defined(PETSC_USE_DEBUG)
5735   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5736 #endif
5737   {
5738     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5739     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5740     PetscBool roworiented = aij->roworiented;
5741 
5742     /* Some Variables required in the macro */
5743     Mat        A                 = aij->A;
5744     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5745     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5746     MatScalar  *aa               = a->a;
5747     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5748     Mat        B                 = aij->B;
5749     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5750     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5751     MatScalar  *ba               = b->a;
5752 
5753     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5754     PetscInt  nonew = a->nonew;
5755     MatScalar *ap1,*ap2;
5756 
5757     PetscFunctionBegin;
5758     for (i=0; i<m; i++) {
5759       if (im[i] < 0) continue;
5760 #if defined(PETSC_USE_DEBUG)
5761       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5762 #endif
5763       if (im[i] >= rstart && im[i] < rend) {
5764         row      = im[i] - rstart;
5765         lastcol1 = -1;
5766         rp1      = aj + ai[row];
5767         ap1      = aa + ai[row];
5768         rmax1    = aimax[row];
5769         nrow1    = ailen[row];
5770         low1     = 0;
5771         high1    = nrow1;
5772         lastcol2 = -1;
5773         rp2      = bj + bi[row];
5774         ap2      = ba + bi[row];
5775         rmax2    = bimax[row];
5776         nrow2    = bilen[row];
5777         low2     = 0;
5778         high2    = nrow2;
5779 
5780         for (j=0; j<n; j++) {
5781           if (roworiented) value = v[i*n+j];
5782           else value = v[i+j*m];
5783           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5784           if (in[j] >= cstart && in[j] < cend) {
5785             col = in[j] - cstart;
5786             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5787           } else if (in[j] < 0) continue;
5788 #if defined(PETSC_USE_DEBUG)
5789           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5790 #endif
5791           else {
5792             if (mat->was_assembled) {
5793               if (!aij->colmap) {
5794                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5795               }
5796 #if defined(PETSC_USE_CTABLE)
5797               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5798               col--;
5799 #else
5800               col = aij->colmap[in[j]] - 1;
5801 #endif
5802               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5803                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5804                 col  =  in[j];
5805                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5806                 B     = aij->B;
5807                 b     = (Mat_SeqAIJ*)B->data;
5808                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5809                 rp2   = bj + bi[row];
5810                 ap2   = ba + bi[row];
5811                 rmax2 = bimax[row];
5812                 nrow2 = bilen[row];
5813                 low2  = 0;
5814                 high2 = nrow2;
5815                 bm    = aij->B->rmap->n;
5816                 ba    = b->a;
5817               }
5818             } else col = in[j];
5819             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5820           }
5821         }
5822       } else if (!aij->donotstash) {
5823         if (roworiented) {
5824           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5825         } else {
5826           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5827         }
5828       }
5829     }
5830   }
5831   PetscFunctionReturnVoid();
5832 }
5833 
5834