xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7da1fb6e516b95fd231a3ef1ae9cb4a28f357e5e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {
526           if (roworiented) value = v[i*n+j];
527           else             value = v[i+j*m];
528         } else value = 0.0;
529         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
530         if (in[j] >= cstart && in[j] < cend) {
531           col   = in[j] - cstart;
532           nonew = a->nonew;
533           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
534         } else if (in[j] < 0) continue;
535 #if defined(PETSC_USE_DEBUG)
536         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
537 #endif
538         else {
539           if (mat->was_assembled) {
540             if (!aij->colmap) {
541               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
542             }
543 #if defined(PETSC_USE_CTABLE)
544             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
545             col--;
546 #else
547             col = aij->colmap[in[j]] - 1;
548 #endif
549             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
550               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
551               col  =  in[j];
552               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
553               B     = aij->B;
554               b     = (Mat_SeqAIJ*)B->data;
555               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
556               rp2   = bj + bi[row];
557               ap2   = ba + bi[row];
558               rmax2 = bimax[row];
559               nrow2 = bilen[row];
560               low2  = 0;
561               high2 = nrow2;
562               bm    = aij->B->rmap->n;
563               ba    = b->a;
564             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
565           } else col = in[j];
566           nonew = b->nonew;
567           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
568         }
569       }
570     } else {
571       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
572       if (!aij->donotstash) {
573         mat->assembled = PETSC_FALSE;
574         if (roworiented) {
575           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
576         } else {
577           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         }
579       }
580     }
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatGetValues_MPIAIJ"
587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
588 {
589   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
590   PetscErrorCode ierr;
591   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
592   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
593 
594   PetscFunctionBegin;
595   for (i=0; i<m; i++) {
596     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
597     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
598     if (idxm[i] >= rstart && idxm[i] < rend) {
599       row = idxm[i] - rstart;
600       for (j=0; j<n; j++) {
601         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
602         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
603         if (idxn[j] >= cstart && idxn[j] < cend) {
604           col  = idxn[j] - cstart;
605           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
606         } else {
607           if (!aij->colmap) {
608             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609           }
610 #if defined(PETSC_USE_CTABLE)
611           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
612           col--;
613 #else
614           col = aij->colmap[idxn[j]] - 1;
615 #endif
616           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
617           else {
618             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
619           }
620         }
621       }
622     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
632 {
633   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
634   PetscErrorCode ierr;
635   PetscInt       nstash,reallocs;
636   InsertMode     addv;
637 
638   PetscFunctionBegin;
639   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
640 
641   /* make sure all processors are either in INSERTMODE or ADDMODE */
642   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
643   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
644   mat->insertmode = addv; /* in case this processor had no cache */
645 
646   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
647   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
648   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
655 {
656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
657   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
658   PetscErrorCode ierr;
659   PetscMPIInt    n;
660   PetscInt       i,j,rstart,ncols,flg;
661   PetscInt       *row,*col;
662   PetscBool      other_disassembled;
663   PetscScalar    *val;
664   InsertMode     addv = mat->insertmode;
665 
666   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
667 
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n; ) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) {
677           if (row[j] != rstart) break;
678         }
679         if (j < n) ncols = j-i;
680         else       ncols = n-i;
681         /* Now assemble all these values with a single function call */
682         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
683 
684         i = j;
685       }
686     }
687     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
688   }
689   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
690   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
691 
692   /* determine if any processor has disassembled, if so we must
693      also disassemble ourselfs, in order that we may reassemble. */
694   /*
695      if nonzero structure of submatrix B cannot change then we know that
696      no processor disassembled thus we can skip this stuff
697   */
698   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
699     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
700     if (mat->was_assembled && !other_disassembled) {
701       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
702     }
703   }
704   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
705     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
706   }
707   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
708   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
709   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
710 
711   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
712 
713   aij->rowvalues = 0;
714 
715   /* used by MatAXPY() */
716   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
717   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
718 
719   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
720   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
721 
722   {
723     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
724     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
747   PetscInt      *owners = A->rmap->range;
748   PetscInt       n      = A->rmap->n;
749   PetscMPIInt    size   = mat->size;
750   PetscSF        sf;
751   PetscInt      *lrows;
752   PetscSFNode   *rrows;
753   PetscInt       lastidx = -1, r, p = 0, len = 0;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   /* Create SF where leaves are input rows and roots are owned rows */
758   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
759   for (r = 0; r < n; ++r) lrows[r] = -1;
760   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
761   for (r = 0; r < N; ++r) {
762     const PetscInt idx   = rows[r];
763     PetscBool      found = PETSC_FALSE;
764     /* Trick for efficient searching for sorted rows */
765     if (lastidx > idx) p = 0;
766     lastidx = idx;
767     for (; p < size; ++p) {
768       if (idx >= owners[p] && idx < owners[p+1]) {
769         rrows[r].rank  = p;
770         rrows[r].index = rows[r] - owners[p];
771         found = PETSC_TRUE;
772         break;
773       }
774     }
775     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
776   }
777   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
778   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
779   /* Collect flags for rows to be zeroed */
780   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
781   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
782   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
783   /* Compress and put in row numbers */
784   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
792     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
793     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
794     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
795   }
796   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
797   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
798   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
799     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
800   } else if (diag != 0.0) {
801     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
802     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
803     for (r = 0; r < len; ++r) {
804       const PetscInt row = lrows[r] + A->rmap->rstart;
805       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
806     }
807     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809   } else {
810     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
811   }
812   ierr = PetscFree(lrows);CHKERRQ(ierr);
813   {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMult_MPIAIJ"
941 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
942 {
943   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
944   PetscErrorCode ierr;
945   PetscInt       nt;
946 
947   PetscFunctionBegin;
948   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
949   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
950   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 #undef __FUNCT__
958 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
959 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultAdd_MPIAIJ"
971 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
986 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscBool      merged;
991 
992   PetscFunctionBegin;
993   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
994   /* do nondiagonal part */
995   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
996   if (!merged) {
997     /* send it on its way */
998     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999     /* do local part */
1000     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1001     /* receive remote parts: note this assumes the values are not actually */
1002     /* added in yy until the next line, */
1003     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004   } else {
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* send it on its way */
1008     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009     /* values actually were received in the Begin() but we need to call this nop */
1010     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1017 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1018 {
1019   MPI_Comm       comm;
1020   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1021   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1022   IS             Me,Notme;
1023   PetscErrorCode ierr;
1024   PetscInt       M,N,first,last,*notme,i;
1025   PetscMPIInt    size;
1026 
1027   PetscFunctionBegin;
1028   /* Easy test: symmetric diagonal block */
1029   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1030   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1031   if (!*f) PetscFunctionReturn(0);
1032   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1034   if (size == 1) PetscFunctionReturn(0);
1035 
1036   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1037   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1038   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1039   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1040   for (i=0; i<first; i++) notme[i] = i;
1041   for (i=last; i<M; i++) notme[i-last+first] = i;
1042   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1043   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1044   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1045   Aoff = Aoffs[0];
1046   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1047   Boff = Boffs[0];
1048   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1049   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1050   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1051   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1052   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1053   ierr = PetscFree(notme);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   /* do nondiagonal part */
1066   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1067   /* send it on its way */
1068   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1069   /* do local part */
1070   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1071   /* receive remote parts */
1072   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*
1077   This only works correctly for square matrices where the subblock A->A is the
1078    diagonal block
1079 */
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1083 {
1084   PetscErrorCode ierr;
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086 
1087   PetscFunctionBegin;
1088   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1089   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1090   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatScale_MPIAIJ"
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1103   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatDestroy_MPIAIJ"
1109 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1110 {
1111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115 #if defined(PETSC_USE_LOG)
1116   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1117 #endif
1118   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1119   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1120   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1121   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1122 #if defined(PETSC_USE_CTABLE)
1123   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1124 #else
1125   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1126 #endif
1127   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1129   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1130   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1131   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1132   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1133 
1134   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1149 {
1150   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1151   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1152   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1153   PetscErrorCode ierr;
1154   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1155   int            fd;
1156   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1157   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1158   PetscScalar    *column_values;
1159   PetscInt       message_count,flowcontrolcount;
1160   FILE           *file;
1161 
1162   PetscFunctionBegin;
1163   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1165   nz   = A->nz + B->nz;
1166   if (!rank) {
1167     header[0] = MAT_FILE_CLASSID;
1168     header[1] = mat->rmap->N;
1169     header[2] = mat->cmap->N;
1170 
1171     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1173     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     /* get largest number of rows any processor has */
1175     rlen  = mat->rmap->n;
1176     range = mat->rmap->range;
1177     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1178   } else {
1179     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     rlen = mat->rmap->n;
1181   }
1182 
1183   /* load up the local row counts */
1184   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1185   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1186 
1187   /* store the row lengths to the file */
1188   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1189   if (!rank) {
1190     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     for (i=1; i<size; i++) {
1192       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1193       rlen = range[i+1] - range[i];
1194       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1196     }
1197     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1198   } else {
1199     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1200     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1201     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1202   }
1203   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1204 
1205   /* load up the local column indices */
1206   nzmax = nz; /* th processor needs space a largest processor needs */
1207   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1208   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1209   cnt   = 0;
1210   for (i=0; i<mat->rmap->n; i++) {
1211     for (j=B->i[i]; j<B->i[i+1]; j++) {
1212       if ((col = garray[B->j[j]]) > cstart) break;
1213       column_indices[cnt++] = col;
1214     }
1215     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1216     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1217   }
1218   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1219 
1220   /* store the column indices to the file */
1221   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1227       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1229       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     }
1232     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1233   } else {
1234     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1235     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1238   }
1239   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1240 
1241   /* load up the local column values */
1242   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1243   cnt  = 0;
1244   for (i=0; i<mat->rmap->n; i++) {
1245     for (j=B->i[i]; j<B->i[i+1]; j++) {
1246       if (garray[B->j[j]] > cstart) break;
1247       column_values[cnt++] = B->a[j];
1248     }
1249     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1250     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1251   }
1252   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1253 
1254   /* store the column values to the file */
1255   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1256   if (!rank) {
1257     MPI_Status status;
1258     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1259     for (i=1; i<size; i++) {
1260       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1261       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1262       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1263       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1264       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1265     }
1266     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1267   } else {
1268     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1269     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1272   }
1273   ierr = PetscFree(column_values);CHKERRQ(ierr);
1274 
1275   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1276   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #include <petscdraw.h>
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank = aij->rank,size = aij->size;
1288   PetscBool         isdraw,iascii,isbinary;
1289   PetscViewer       sviewer;
1290   PetscViewerFormat format;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1296   if (iascii) {
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscBool inodes;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (isdraw) {
1343     PetscDraw draw;
1344     PetscBool isnull;
1345     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1346     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1347   }
1348 
1349   {
1350     /* assemble the entire matrix onto first processor. */
1351     Mat        A;
1352     Mat_SeqAIJ *Aloc;
1353     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1354     MatScalar  *a;
1355 
1356     if (mat->rmap->N > 1024) {
1357       PetscBool flg = PETSC_FALSE;
1358 
1359       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1360       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1361     }
1362 
1363     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1364     if (!rank) {
1365       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1366     } else {
1367       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1368     }
1369     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1370     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1371     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1372     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1373     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1374 
1375     /* copy over the A part */
1376     Aloc = (Mat_SeqAIJ*)aij->A->data;
1377     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1378     row  = mat->rmap->rstart;
1379     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1380     for (i=0; i<m; i++) {
1381       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1382       row++;
1383       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1384     }
1385     aj = Aloc->j;
1386     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1387 
1388     /* copy over the B part */
1389     Aloc = (Mat_SeqAIJ*)aij->B->data;
1390     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1391     row  = mat->rmap->rstart;
1392     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1393     ct   = cols;
1394     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1395     for (i=0; i<m; i++) {
1396       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1397       row++;
1398       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1399     }
1400     ierr = PetscFree(ct);CHKERRQ(ierr);
1401     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1403     /*
1404        Everyone has to call to draw the matrix since the graphics waits are
1405        synchronized across all processors that share the PetscDraw object
1406     */
1407     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1408     if (!rank) {
1409       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1410     }
1411     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1412     ierr = MatDestroy(&A);CHKERRQ(ierr);
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatView_MPIAIJ"
1419 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1420 {
1421   PetscErrorCode ierr;
1422   PetscBool      iascii,isdraw,issocket,isbinary;
1423 
1424   PetscFunctionBegin;
1425   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1426   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1427   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1428   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1429   if (iascii || isdraw || isbinary || issocket) {
1430     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatSOR_MPIAIJ"
1437 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1438 {
1439   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1440   PetscErrorCode ierr;
1441   Vec            bb1 = 0;
1442   PetscBool      hasop;
1443 
1444   PetscFunctionBegin;
1445   if (flag == SOR_APPLY_UPPER) {
1446     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1447     PetscFunctionReturn(0);
1448   }
1449 
1450   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1451     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1452   }
1453 
1454   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1455     if (flag & SOR_ZERO_INITIAL_GUESS) {
1456       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1457       its--;
1458     }
1459 
1460     while (its--) {
1461       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463 
1464       /* update rhs: bb1 = bb - B*x */
1465       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1466       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1467 
1468       /* local sweep */
1469       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1470     }
1471   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1472     if (flag & SOR_ZERO_INITIAL_GUESS) {
1473       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1474       its--;
1475     }
1476     while (its--) {
1477       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479 
1480       /* update rhs: bb1 = bb - B*x */
1481       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1482       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1483 
1484       /* local sweep */
1485       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1486     }
1487   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1488     if (flag & SOR_ZERO_INITIAL_GUESS) {
1489       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1490       its--;
1491     }
1492     while (its--) {
1493       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495 
1496       /* update rhs: bb1 = bb - B*x */
1497       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1498       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1499 
1500       /* local sweep */
1501       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1502     }
1503   } else if (flag & SOR_EISENSTAT) {
1504     Vec xx1;
1505 
1506     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1507     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1508 
1509     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511     if (!mat->diag) {
1512       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1513       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1514     }
1515     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1516     if (hasop) {
1517       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1518     } else {
1519       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1520     }
1521     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1522 
1523     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1524 
1525     /* local sweep */
1526     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1527     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1528     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1529   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1530 
1531   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatPermute_MPIAIJ"
1537 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1538 {
1539   Mat            aA,aB,Aperm;
1540   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1541   PetscScalar    *aa,*ba;
1542   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1543   PetscSF        rowsf,sf;
1544   IS             parcolp = NULL;
1545   PetscBool      done;
1546   PetscErrorCode ierr;
1547 
1548   PetscFunctionBegin;
1549   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1550   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1551   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1552   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1553 
1554   /* Invert row permutation to find out where my rows should go */
1555   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1556   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1557   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1558   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1559   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1560   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1561 
1562   /* Invert column permutation to find out where my columns should go */
1563   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1564   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1565   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1566   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1567   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1568   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1570 
1571   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1572   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1573   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1574 
1575   /* Find out where my gcols should go */
1576   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1577   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1578   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1579   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1580   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1581   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1583   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1584 
1585   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1586   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1587   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1588   for (i=0; i<m; i++) {
1589     PetscInt row = rdest[i],rowner;
1590     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1591     for (j=ai[i]; j<ai[i+1]; j++) {
1592       PetscInt cowner,col = cdest[aj[j]];
1593       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1594       if (rowner == cowner) dnnz[i]++;
1595       else onnz[i]++;
1596     }
1597     for (j=bi[i]; j<bi[i+1]; j++) {
1598       PetscInt cowner,col = gcdest[bj[j]];
1599       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1600       if (rowner == cowner) dnnz[i]++;
1601       else onnz[i]++;
1602     }
1603   }
1604   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1605   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1606   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1607   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1608   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1609 
1610   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1611   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1612   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1613   for (i=0; i<m; i++) {
1614     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1615     PetscInt j0,rowlen;
1616     rowlen = ai[i+1] - ai[i];
1617     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1618       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1619       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1620     }
1621     rowlen = bi[i+1] - bi[i];
1622     for (j0=j=0; j<rowlen; j0=j) {
1623       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1624       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1625     }
1626   }
1627   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1628   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1629   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1630   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1631   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1632   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1633   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1634   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1635   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1636   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1637   *B = Aperm;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1643 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1644 {
1645   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1646   Mat            A    = mat->A,B = mat->B;
1647   PetscErrorCode ierr;
1648   PetscReal      isend[5],irecv[5];
1649 
1650   PetscFunctionBegin;
1651   info->block_size = 1.0;
1652   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1653 
1654   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1655   isend[3] = info->memory;  isend[4] = info->mallocs;
1656 
1657   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1658 
1659   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1660   isend[3] += info->memory;  isend[4] += info->mallocs;
1661   if (flag == MAT_LOCAL) {
1662     info->nz_used      = isend[0];
1663     info->nz_allocated = isend[1];
1664     info->nz_unneeded  = isend[2];
1665     info->memory       = isend[3];
1666     info->mallocs      = isend[4];
1667   } else if (flag == MAT_GLOBAL_MAX) {
1668     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1669 
1670     info->nz_used      = irecv[0];
1671     info->nz_allocated = irecv[1];
1672     info->nz_unneeded  = irecv[2];
1673     info->memory       = irecv[3];
1674     info->mallocs      = irecv[4];
1675   } else if (flag == MAT_GLOBAL_SUM) {
1676     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1677 
1678     info->nz_used      = irecv[0];
1679     info->nz_allocated = irecv[1];
1680     info->nz_unneeded  = irecv[2];
1681     info->memory       = irecv[3];
1682     info->mallocs      = irecv[4];
1683   }
1684   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1685   info->fill_ratio_needed = 0;
1686   info->factor_mallocs    = 0;
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatSetOption_MPIAIJ"
1692 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1693 {
1694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1695   PetscErrorCode ierr;
1696 
1697   PetscFunctionBegin;
1698   switch (op) {
1699   case MAT_NEW_NONZERO_LOCATIONS:
1700   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1701   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1702   case MAT_KEEP_NONZERO_PATTERN:
1703   case MAT_NEW_NONZERO_LOCATION_ERR:
1704   case MAT_USE_INODES:
1705   case MAT_IGNORE_ZERO_ENTRIES:
1706     MatCheckPreallocated(A,1);
1707     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1708     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1709     break;
1710   case MAT_ROW_ORIENTED:
1711     a->roworiented = flg;
1712 
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1715     break;
1716   case MAT_NEW_DIAGONALS:
1717     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1718     break;
1719   case MAT_IGNORE_OFF_PROC_ENTRIES:
1720     a->donotstash = flg;
1721     break;
1722   case MAT_SPD:
1723     A->spd_set = PETSC_TRUE;
1724     A->spd     = flg;
1725     if (flg) {
1726       A->symmetric                  = PETSC_TRUE;
1727       A->structurally_symmetric     = PETSC_TRUE;
1728       A->symmetric_set              = PETSC_TRUE;
1729       A->structurally_symmetric_set = PETSC_TRUE;
1730     }
1731     break;
1732   case MAT_SYMMETRIC:
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_STRUCTURALLY_SYMMETRIC:
1736     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1737     break;
1738   case MAT_HERMITIAN:
1739     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1740     break;
1741   case MAT_SYMMETRY_ETERNAL:
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   default:
1745     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatGetRow_MPIAIJ"
1752 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1753 {
1754   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1755   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1756   PetscErrorCode ierr;
1757   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1758   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1759   PetscInt       *cmap,*idx_p;
1760 
1761   PetscFunctionBegin;
1762   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1763   mat->getrowactive = PETSC_TRUE;
1764 
1765   if (!mat->rowvalues && (idx || v)) {
1766     /*
1767         allocate enough space to hold information from the longest row.
1768     */
1769     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1770     PetscInt   max = 1,tmp;
1771     for (i=0; i<matin->rmap->n; i++) {
1772       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1773       if (max < tmp) max = tmp;
1774     }
1775     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1776   }
1777 
1778   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1779   lrow = row - rstart;
1780 
1781   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1782   if (!v)   {pvA = 0; pvB = 0;}
1783   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1784   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1785   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1786   nztot = nzA + nzB;
1787 
1788   cmap = mat->garray;
1789   if (v  || idx) {
1790     if (nztot) {
1791       /* Sort by increasing column numbers, assuming A and B already sorted */
1792       PetscInt imark = -1;
1793       if (v) {
1794         *v = v_p = mat->rowvalues;
1795         for (i=0; i<nzB; i++) {
1796           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1797           else break;
1798         }
1799         imark = i;
1800         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1801         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1802       }
1803       if (idx) {
1804         *idx = idx_p = mat->rowindices;
1805         if (imark > -1) {
1806           for (i=0; i<imark; i++) {
1807             idx_p[i] = cmap[cworkB[i]];
1808           }
1809         } else {
1810           for (i=0; i<nzB; i++) {
1811             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1812             else break;
1813           }
1814           imark = i;
1815         }
1816         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1817         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1818       }
1819     } else {
1820       if (idx) *idx = 0;
1821       if (v)   *v   = 0;
1822     }
1823   }
1824   *nz  = nztot;
1825   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1826   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1832 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1833 {
1834   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1835 
1836   PetscFunctionBegin;
1837   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1838   aij->getrowactive = PETSC_FALSE;
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatNorm_MPIAIJ"
1844 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1845 {
1846   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1847   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1848   PetscErrorCode ierr;
1849   PetscInt       i,j,cstart = mat->cmap->rstart;
1850   PetscReal      sum = 0.0;
1851   MatScalar      *v;
1852 
1853   PetscFunctionBegin;
1854   if (aij->size == 1) {
1855     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1856   } else {
1857     if (type == NORM_FROBENIUS) {
1858       v = amat->a;
1859       for (i=0; i<amat->nz; i++) {
1860         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1861       }
1862       v = bmat->a;
1863       for (i=0; i<bmat->nz; i++) {
1864         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1865       }
1866       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1867       *norm = PetscSqrtReal(*norm);
1868     } else if (type == NORM_1) { /* max column norm */
1869       PetscReal *tmp,*tmp2;
1870       PetscInt  *jj,*garray = aij->garray;
1871       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1872       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1873       *norm = 0.0;
1874       v     = amat->a; jj = amat->j;
1875       for (j=0; j<amat->nz; j++) {
1876         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1877       }
1878       v = bmat->a; jj = bmat->j;
1879       for (j=0; j<bmat->nz; j++) {
1880         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1881       }
1882       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1883       for (j=0; j<mat->cmap->N; j++) {
1884         if (tmp2[j] > *norm) *norm = tmp2[j];
1885       }
1886       ierr = PetscFree(tmp);CHKERRQ(ierr);
1887       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1888     } else if (type == NORM_INFINITY) { /* max row norm */
1889       PetscReal ntemp = 0.0;
1890       for (j=0; j<aij->A->rmap->n; j++) {
1891         v   = amat->a + amat->i[j];
1892         sum = 0.0;
1893         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1894           sum += PetscAbsScalar(*v); v++;
1895         }
1896         v = bmat->a + bmat->i[j];
1897         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1898           sum += PetscAbsScalar(*v); v++;
1899         }
1900         if (sum > ntemp) ntemp = sum;
1901       }
1902       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1903     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatTranspose_MPIAIJ"
1910 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1911 {
1912   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1913   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1914   PetscErrorCode ierr;
1915   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1916   PetscInt       cstart = A->cmap->rstart,ncol;
1917   Mat            B;
1918   MatScalar      *array;
1919 
1920   PetscFunctionBegin;
1921   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1922 
1923   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1924   ai = Aloc->i; aj = Aloc->j;
1925   bi = Bloc->i; bj = Bloc->j;
1926   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1927     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1928     PetscSFNode          *oloc;
1929     PETSC_UNUSED PetscSF sf;
1930 
1931     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1932     /* compute d_nnz for preallocation */
1933     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1934     for (i=0; i<ai[ma]; i++) {
1935       d_nnz[aj[i]]++;
1936       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1937     }
1938     /* compute local off-diagonal contributions */
1939     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1940     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1941     /* map those to global */
1942     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1943     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1944     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1945     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1946     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1947     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1948     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1949 
1950     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1951     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1952     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1953     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1954     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1955     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1956   } else {
1957     B    = *matout;
1958     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1959     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1960   }
1961 
1962   /* copy over the A part */
1963   array = Aloc->a;
1964   row   = A->rmap->rstart;
1965   for (i=0; i<ma; i++) {
1966     ncol = ai[i+1]-ai[i];
1967     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1968     row++;
1969     array += ncol; aj += ncol;
1970   }
1971   aj = Aloc->j;
1972   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1973 
1974   /* copy over the B part */
1975   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1976   array = Bloc->a;
1977   row   = A->rmap->rstart;
1978   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1979   cols_tmp = cols;
1980   for (i=0; i<mb; i++) {
1981     ncol = bi[i+1]-bi[i];
1982     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1983     row++;
1984     array += ncol; cols_tmp += ncol;
1985   }
1986   ierr = PetscFree(cols);CHKERRQ(ierr);
1987 
1988   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1990   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1991     *matout = B;
1992   } else {
1993     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2000 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2001 {
2002   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2003   Mat            a    = aij->A,b = aij->B;
2004   PetscErrorCode ierr;
2005   PetscInt       s1,s2,s3;
2006 
2007   PetscFunctionBegin;
2008   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2009   if (rr) {
2010     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2011     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2012     /* Overlap communication with computation. */
2013     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2014   }
2015   if (ll) {
2016     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2017     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2018     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2019   }
2020   /* scale  the diagonal block */
2021   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2022 
2023   if (rr) {
2024     /* Do a scatter end and then right scale the off-diagonal block */
2025     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2026     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2033 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2034 {
2035   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2036   PetscErrorCode ierr;
2037 
2038   PetscFunctionBegin;
2039   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 #undef __FUNCT__
2044 #define __FUNCT__ "MatEqual_MPIAIJ"
2045 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2046 {
2047   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2048   Mat            a,b,c,d;
2049   PetscBool      flg;
2050   PetscErrorCode ierr;
2051 
2052   PetscFunctionBegin;
2053   a = matA->A; b = matA->B;
2054   c = matB->A; d = matB->B;
2055 
2056   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2057   if (flg) {
2058     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2059   }
2060   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatCopy_MPIAIJ"
2066 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2067 {
2068   PetscErrorCode ierr;
2069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2070   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2071 
2072   PetscFunctionBegin;
2073   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2074   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2075     /* because of the column compression in the off-processor part of the matrix a->B,
2076        the number of columns in a->B and b->B may be different, hence we cannot call
2077        the MatCopy() directly on the two parts. If need be, we can provide a more
2078        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2079        then copying the submatrices */
2080     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2081   } else {
2082     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2083     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatSetUp_MPIAIJ"
2090 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2091 {
2092   PetscErrorCode ierr;
2093 
2094   PetscFunctionBegin;
2095   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2101 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2102 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2103 {
2104   PetscInt       i,m=Y->rmap->N;
2105   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2106   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2107   const PetscInt *xi = x->i,*yi = y->i;
2108 
2109   PetscFunctionBegin;
2110   /* Set the number of nonzeros in the new matrix */
2111   for (i=0; i<m; i++) {
2112     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2113     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2114     nnz[i] = 0;
2115     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2116       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2117       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2118       nnz[i]++;
2119     }
2120     for (; k<nzy; k++) nnz[i]++;
2121   }
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #undef __FUNCT__
2126 #define __FUNCT__ "MatAXPY_MPIAIJ"
2127 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2128 {
2129   PetscErrorCode ierr;
2130   PetscInt       i;
2131   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2132   PetscBLASInt   bnz,one=1;
2133   Mat_SeqAIJ     *x,*y;
2134 
2135   PetscFunctionBegin;
2136   if (str == SAME_NONZERO_PATTERN) {
2137     PetscScalar alpha = a;
2138     x    = (Mat_SeqAIJ*)xx->A->data;
2139     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2140     y    = (Mat_SeqAIJ*)yy->A->data;
2141     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2142     x    = (Mat_SeqAIJ*)xx->B->data;
2143     y    = (Mat_SeqAIJ*)yy->B->data;
2144     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2145     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2146   } else if (str == SUBSET_NONZERO_PATTERN) {
2147     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2148 
2149     x = (Mat_SeqAIJ*)xx->B->data;
2150     y = (Mat_SeqAIJ*)yy->B->data;
2151     if (y->xtoy && y->XtoY != xx->B) {
2152       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2153       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2154     }
2155     if (!y->xtoy) { /* get xtoy */
2156       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2157       y->XtoY = xx->B;
2158       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2159     }
2160     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2161   } else {
2162     Mat      B;
2163     PetscInt *nnz_d,*nnz_o;
2164     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2165     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2166     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2167     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2168     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2169     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2170     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2171     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2172     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2173     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2174     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2175     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2176     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2177     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2178   }
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2183 
2184 #undef __FUNCT__
2185 #define __FUNCT__ "MatConjugate_MPIAIJ"
2186 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2187 {
2188 #if defined(PETSC_USE_COMPLEX)
2189   PetscErrorCode ierr;
2190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2191 
2192   PetscFunctionBegin;
2193   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2194   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2195 #else
2196   PetscFunctionBegin;
2197 #endif
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatRealPart_MPIAIJ"
2203 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2210   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2216 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2217 {
2218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2223   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #if defined(PETSC_HAVE_PBGL)
2228 
2229 #include <boost/parallel/mpi/bsp_process_group.hpp>
2230 #include <boost/graph/distributed/ilu_default_graph.hpp>
2231 #include <boost/graph/distributed/ilu_0_block.hpp>
2232 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2233 #include <boost/graph/distributed/petsc/interface.hpp>
2234 #include <boost/multi_array.hpp>
2235 #include <boost/parallel/distributed_property_map->hpp>
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2239 /*
2240   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2241 */
2242 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2243 {
2244   namespace petsc = boost::distributed::petsc;
2245 
2246   namespace graph_dist = boost::graph::distributed;
2247   using boost::graph::distributed::ilu_default::process_group_type;
2248   using boost::graph::ilu_permuted;
2249 
2250   PetscBool      row_identity, col_identity;
2251   PetscContainer c;
2252   PetscInt       m, n, M, N;
2253   PetscErrorCode ierr;
2254 
2255   PetscFunctionBegin;
2256   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2257   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2258   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2259   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2260 
2261   process_group_type pg;
2262   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2263   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2264   lgraph_type& level_graph = *lgraph_p;
2265   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2266 
2267   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2268   ilu_permuted(level_graph);
2269 
2270   /* put together the new matrix */
2271   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2272   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2273   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2274   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2275   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2276   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2277   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2278   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2279 
2280   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2281   ierr = PetscContainerSetPointer(c, lgraph_p);
2282   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2283   ierr = PetscContainerDestroy(&c);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2289 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2290 {
2291   PetscFunctionBegin;
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatSolve_MPIAIJ"
2297 /*
2298   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2299 */
2300 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2301 {
2302   namespace graph_dist = boost::graph::distributed;
2303 
2304   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2305   lgraph_type    *lgraph_p;
2306   PetscContainer c;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2311   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2312   ierr = VecCopy(b, x);CHKERRQ(ierr);
2313 
2314   PetscScalar *array_x;
2315   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2316   PetscInt sx;
2317   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2318 
2319   PetscScalar *array_b;
2320   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2321   PetscInt sb;
2322   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2323 
2324   lgraph_type& level_graph = *lgraph_p;
2325   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2326 
2327   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2328   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2329   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2330 
2331   typedef boost::iterator_property_map<array_ref_type::iterator,
2332                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2333   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2334   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2335 
2336   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2337   PetscFunctionReturn(0);
2338 }
2339 #endif
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatDestroy_MatRedundant"
2343 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2344 {
2345   PetscErrorCode ierr;
2346   Mat_Redundant  *redund;
2347   PetscInt       i;
2348   PetscMPIInt    size;
2349 
2350   PetscFunctionBegin;
2351   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2352   if (size == 1) {
2353     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2354     redund = a->redundant;
2355   } else {
2356     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2357     redund = a->redundant;
2358   }
2359   if (redund){
2360     if (redund->matseq) { /* via MatGetSubMatrices()  */
2361       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2362       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2363       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2364       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2365     } else {
2366       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2367       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2368       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2369       for (i=0; i<redund->nrecvs; i++) {
2370         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2371         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2372       }
2373       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2374     }
2375 
2376     if (redund->psubcomm) {
2377       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2378     }
2379     ierr = redund->Destroy(A);CHKERRQ(ierr);
2380     ierr = PetscFree(redund);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2387 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2388 {
2389   PetscMPIInt    rank,size;
2390   MPI_Comm       comm;
2391   PetscErrorCode ierr;
2392   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2393   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2394   PetscInt       *rowrange = mat->rmap->range;
2395   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2396   Mat            A = aij->A,B=aij->B,C=*matredundant;
2397   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2398   PetscScalar    *sbuf_a;
2399   PetscInt       nzlocal=a->nz+b->nz;
2400   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2401   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2402   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2403   MatScalar      *aworkA,*aworkB;
2404   PetscScalar    *vals;
2405   PetscMPIInt    tag1,tag2,tag3,imdex;
2406   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2407   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2408   MPI_Status     recv_status,*send_status;
2409   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2410   PetscInt       **rbuf_j=NULL;
2411   PetscScalar    **rbuf_a=NULL;
2412   Mat_Redundant  *redund =NULL;
2413 
2414   PetscFunctionBegin;
2415   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2416   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2417   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2418   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2419   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2420 
2421   if (reuse == MAT_REUSE_MATRIX) {
2422     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2423     if (subsize == 1) {
2424       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2425       redund = c->redundant;
2426     } else {
2427       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2428       redund = c->redundant;
2429     }
2430     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2431 
2432     nsends    = redund->nsends;
2433     nrecvs    = redund->nrecvs;
2434     send_rank = redund->send_rank;
2435     recv_rank = redund->recv_rank;
2436     sbuf_nz   = redund->sbuf_nz;
2437     rbuf_nz   = redund->rbuf_nz;
2438     sbuf_j    = redund->sbuf_j;
2439     sbuf_a    = redund->sbuf_a;
2440     rbuf_j    = redund->rbuf_j;
2441     rbuf_a    = redund->rbuf_a;
2442   }
2443 
2444   if (reuse == MAT_INITIAL_MATRIX) {
2445     PetscInt    nleftover,np_subcomm;
2446 
2447     /* get the destination processors' id send_rank, nsends and nrecvs */
2448     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2449 
2450     np_subcomm = size/nsubcomm;
2451     nleftover  = size - nsubcomm*np_subcomm;
2452 
2453     /* block of codes below is specific for INTERLACED */
2454     /* ------------------------------------------------*/
2455     nsends = 0; nrecvs = 0;
2456     for (i=0; i<size; i++) {
2457       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2458         send_rank[nsends++] = i;
2459         recv_rank[nrecvs++] = i;
2460       }
2461     }
2462     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2463       i = size-nleftover-1;
2464       j = 0;
2465       while (j < nsubcomm - nleftover) {
2466         send_rank[nsends++] = i;
2467         i--; j++;
2468       }
2469     }
2470 
2471     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2472       for (i=0; i<nleftover; i++) {
2473         recv_rank[nrecvs++] = size-nleftover+i;
2474       }
2475     }
2476     /*----------------------------------------------*/
2477 
2478     /* allocate sbuf_j, sbuf_a */
2479     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2480     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2481     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2482     /*
2483     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2484     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2485      */
2486   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2487 
2488   /* copy mat's local entries into the buffers */
2489   if (reuse == MAT_INITIAL_MATRIX) {
2490     rownz_max = 0;
2491     rptr      = sbuf_j;
2492     cols      = sbuf_j + rend-rstart + 1;
2493     vals      = sbuf_a;
2494     rptr[0]   = 0;
2495     for (i=0; i<rend-rstart; i++) {
2496       row    = i + rstart;
2497       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2498       ncols  = nzA + nzB;
2499       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2500       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2501       /* load the column indices for this row into cols */
2502       lwrite = 0;
2503       for (l=0; l<nzB; l++) {
2504         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2505           vals[lwrite]   = aworkB[l];
2506           cols[lwrite++] = ctmp;
2507         }
2508       }
2509       for (l=0; l<nzA; l++) {
2510         vals[lwrite]   = aworkA[l];
2511         cols[lwrite++] = cstart + cworkA[l];
2512       }
2513       for (l=0; l<nzB; l++) {
2514         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2515           vals[lwrite]   = aworkB[l];
2516           cols[lwrite++] = ctmp;
2517         }
2518       }
2519       vals     += ncols;
2520       cols     += ncols;
2521       rptr[i+1] = rptr[i] + ncols;
2522       if (rownz_max < ncols) rownz_max = ncols;
2523     }
2524     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2525   } else { /* only copy matrix values into sbuf_a */
2526     rptr    = sbuf_j;
2527     vals    = sbuf_a;
2528     rptr[0] = 0;
2529     for (i=0; i<rend-rstart; i++) {
2530       row    = i + rstart;
2531       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2532       ncols  = nzA + nzB;
2533       cworkB = b->j + b->i[i];
2534       aworkA = a->a + a->i[i];
2535       aworkB = b->a + b->i[i];
2536       lwrite = 0;
2537       for (l=0; l<nzB; l++) {
2538         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2539       }
2540       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2541       for (l=0; l<nzB; l++) {
2542         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2543       }
2544       vals     += ncols;
2545       rptr[i+1] = rptr[i] + ncols;
2546     }
2547   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2548 
2549   /* send nzlocal to others, and recv other's nzlocal */
2550   /*--------------------------------------------------*/
2551   if (reuse == MAT_INITIAL_MATRIX) {
2552     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2553 
2554     s_waits2 = s_waits3 + nsends;
2555     s_waits1 = s_waits2 + nsends;
2556     r_waits1 = s_waits1 + nsends;
2557     r_waits2 = r_waits1 + nrecvs;
2558     r_waits3 = r_waits2 + nrecvs;
2559   } else {
2560     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2561 
2562     r_waits3 = s_waits3 + nsends;
2563   }
2564 
2565   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2566   if (reuse == MAT_INITIAL_MATRIX) {
2567     /* get new tags to keep the communication clean */
2568     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2569     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2570     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2571 
2572     /* post receives of other's nzlocal */
2573     for (i=0; i<nrecvs; i++) {
2574       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2575     }
2576     /* send nzlocal to others */
2577     for (i=0; i<nsends; i++) {
2578       sbuf_nz[i] = nzlocal;
2579       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2580     }
2581     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2582     count = nrecvs;
2583     while (count) {
2584       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2585 
2586       recv_rank[imdex] = recv_status.MPI_SOURCE;
2587       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2588       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2589 
2590       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2591 
2592       rbuf_nz[imdex] += i + 2;
2593 
2594       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2595       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2596       count--;
2597     }
2598     /* wait on sends of nzlocal */
2599     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2600     /* send mat->i,j to others, and recv from other's */
2601     /*------------------------------------------------*/
2602     for (i=0; i<nsends; i++) {
2603       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2604       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2605     }
2606     /* wait on receives of mat->i,j */
2607     /*------------------------------*/
2608     count = nrecvs;
2609     while (count) {
2610       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2611       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2612       count--;
2613     }
2614     /* wait on sends of mat->i,j */
2615     /*---------------------------*/
2616     if (nsends) {
2617       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2618     }
2619   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2620 
2621   /* post receives, send and receive mat->a */
2622   /*----------------------------------------*/
2623   for (imdex=0; imdex<nrecvs; imdex++) {
2624     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2625   }
2626   for (i=0; i<nsends; i++) {
2627     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2628   }
2629   count = nrecvs;
2630   while (count) {
2631     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2632     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2633     count--;
2634   }
2635   if (nsends) {
2636     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2637   }
2638 
2639   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2640 
2641   /* create redundant matrix */
2642   /*-------------------------*/
2643   if (reuse == MAT_INITIAL_MATRIX) {
2644     const PetscInt *range;
2645     PetscInt       rstart_sub,rend_sub,mloc_sub;
2646 
2647     /* compute rownz_max for preallocation */
2648     for (imdex=0; imdex<nrecvs; imdex++) {
2649       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2650       rptr = rbuf_j[imdex];
2651       for (i=0; i<j; i++) {
2652         ncols = rptr[i+1] - rptr[i];
2653         if (rownz_max < ncols) rownz_max = ncols;
2654       }
2655     }
2656 
2657     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2658 
2659     /* get local size of redundant matrix
2660        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2661     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2662     rstart_sub = range[nsubcomm*subrank];
2663     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2664       rend_sub = range[nsubcomm*(subrank+1)];
2665     } else {
2666       rend_sub = mat->rmap->N;
2667     }
2668     mloc_sub = rend_sub - rstart_sub;
2669 
2670     if (M == N) {
2671       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2672     } else { /* non-square matrix */
2673       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2674     }
2675     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2676     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2677     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2678     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2679   } else {
2680     C = *matredundant;
2681   }
2682 
2683   /* insert local matrix entries */
2684   rptr = sbuf_j;
2685   cols = sbuf_j + rend-rstart + 1;
2686   vals = sbuf_a;
2687   for (i=0; i<rend-rstart; i++) {
2688     row   = i + rstart;
2689     ncols = rptr[i+1] - rptr[i];
2690     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2691     vals += ncols;
2692     cols += ncols;
2693   }
2694   /* insert received matrix entries */
2695   for (imdex=0; imdex<nrecvs; imdex++) {
2696     rstart = rowrange[recv_rank[imdex]];
2697     rend   = rowrange[recv_rank[imdex]+1];
2698     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2699     rptr   = rbuf_j[imdex];
2700     cols   = rbuf_j[imdex] + rend-rstart + 1;
2701     vals   = rbuf_a[imdex];
2702     for (i=0; i<rend-rstart; i++) {
2703       row   = i + rstart;
2704       ncols = rptr[i+1] - rptr[i];
2705       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2706       vals += ncols;
2707       cols += ncols;
2708     }
2709   }
2710   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2712 
2713   if (reuse == MAT_INITIAL_MATRIX) {
2714     *matredundant = C;
2715 
2716     /* create a supporting struct and attach it to C for reuse */
2717     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2718     if (subsize == 1) {
2719       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2720       c->redundant = redund;
2721     } else {
2722       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2723       c->redundant = redund;
2724     }
2725 
2726     redund->nzlocal   = nzlocal;
2727     redund->nsends    = nsends;
2728     redund->nrecvs    = nrecvs;
2729     redund->send_rank = send_rank;
2730     redund->recv_rank = recv_rank;
2731     redund->sbuf_nz   = sbuf_nz;
2732     redund->rbuf_nz   = rbuf_nz;
2733     redund->sbuf_j    = sbuf_j;
2734     redund->sbuf_a    = sbuf_a;
2735     redund->rbuf_j    = rbuf_j;
2736     redund->rbuf_a    = rbuf_a;
2737     redund->psubcomm  = NULL;
2738 
2739     redund->Destroy = C->ops->destroy;
2740     C->ops->destroy = MatDestroy_MatRedundant;
2741   }
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 #undef __FUNCT__
2746 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2747 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2748 {
2749   PetscErrorCode ierr;
2750   MPI_Comm       comm;
2751   PetscMPIInt    size,subsize;
2752   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2753   Mat_Redundant  *redund=NULL;
2754   PetscSubcomm   psubcomm=NULL;
2755   MPI_Comm       subcomm_in=subcomm;
2756   Mat            *matseq;
2757   IS             isrow,iscol;
2758 
2759   PetscFunctionBegin;
2760   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2761     if (reuse ==  MAT_INITIAL_MATRIX) {
2762       /* create psubcomm, then get subcomm */
2763       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2764       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2765       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2766 
2767       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2768       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2769       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2770       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2771       subcomm = psubcomm->comm;
2772     } else { /* retrieve psubcomm and subcomm */
2773       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2774       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2775       if (subsize == 1) {
2776         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2777         redund = c->redundant;
2778       } else {
2779         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2780         redund = c->redundant;
2781       }
2782       psubcomm = redund->psubcomm;
2783     }
2784     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2785       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2786       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2787         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2788         if (subsize == 1) {
2789           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2790           c->redundant->psubcomm = psubcomm;
2791         } else {
2792           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2793           c->redundant->psubcomm = psubcomm ;
2794         }
2795       }
2796       PetscFunctionReturn(0);
2797     }
2798   }
2799 
2800   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2801   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2802   if (reuse == MAT_INITIAL_MATRIX) {
2803     /* create a local sequential matrix matseq[0] */
2804     mloc_sub = PETSC_DECIDE;
2805     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2806     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2807     rstart = rend - mloc_sub;
2808     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2809     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2810   } else { /* reuse == MAT_REUSE_MATRIX */
2811     if (subsize == 1) {
2812       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2813       redund = c->redundant;
2814     } else {
2815       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2816       redund = c->redundant;
2817     }
2818 
2819     isrow  = redund->isrow;
2820     iscol  = redund->iscol;
2821     matseq = redund->matseq;
2822   }
2823   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2824   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2825 
2826   if (reuse == MAT_INITIAL_MATRIX) {
2827     /* create a supporting struct and attach it to C for reuse */
2828     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2829     if (subsize == 1) {
2830       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2831       c->redundant = redund;
2832     } else {
2833       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2834       c->redundant = redund;
2835     }
2836     redund->isrow    = isrow;
2837     redund->iscol    = iscol;
2838     redund->matseq   = matseq;
2839     redund->psubcomm = psubcomm;
2840     redund->Destroy               = (*matredundant)->ops->destroy;
2841     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2842   }
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2848 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2849 {
2850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2851   PetscErrorCode ierr;
2852   PetscInt       i,*idxb = 0;
2853   PetscScalar    *va,*vb;
2854   Vec            vtmp;
2855 
2856   PetscFunctionBegin;
2857   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2858   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2859   if (idx) {
2860     for (i=0; i<A->rmap->n; i++) {
2861       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2862     }
2863   }
2864 
2865   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2866   if (idx) {
2867     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2868   }
2869   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2870   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2871 
2872   for (i=0; i<A->rmap->n; i++) {
2873     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2874       va[i] = vb[i];
2875       if (idx) idx[i] = a->garray[idxb[i]];
2876     }
2877   }
2878 
2879   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2880   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2881   ierr = PetscFree(idxb);CHKERRQ(ierr);
2882   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 #undef __FUNCT__
2887 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2888 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2889 {
2890   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2891   PetscErrorCode ierr;
2892   PetscInt       i,*idxb = 0;
2893   PetscScalar    *va,*vb;
2894   Vec            vtmp;
2895 
2896   PetscFunctionBegin;
2897   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2898   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2899   if (idx) {
2900     for (i=0; i<A->cmap->n; i++) {
2901       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2902     }
2903   }
2904 
2905   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2906   if (idx) {
2907     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2908   }
2909   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2910   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2911 
2912   for (i=0; i<A->rmap->n; i++) {
2913     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2914       va[i] = vb[i];
2915       if (idx) idx[i] = a->garray[idxb[i]];
2916     }
2917   }
2918 
2919   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2920   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2921   ierr = PetscFree(idxb);CHKERRQ(ierr);
2922   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2928 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2929 {
2930   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2931   PetscInt       n      = A->rmap->n;
2932   PetscInt       cstart = A->cmap->rstart;
2933   PetscInt       *cmap  = mat->garray;
2934   PetscInt       *diagIdx, *offdiagIdx;
2935   Vec            diagV, offdiagV;
2936   PetscScalar    *a, *diagA, *offdiagA;
2937   PetscInt       r;
2938   PetscErrorCode ierr;
2939 
2940   PetscFunctionBegin;
2941   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2942   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2943   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2944   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2945   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2946   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2947   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2948   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2949   for (r = 0; r < n; ++r) {
2950     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2951       a[r]   = diagA[r];
2952       idx[r] = cstart + diagIdx[r];
2953     } else {
2954       a[r]   = offdiagA[r];
2955       idx[r] = cmap[offdiagIdx[r]];
2956     }
2957   }
2958   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2959   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2960   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2961   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2962   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2963   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2969 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2970 {
2971   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2972   PetscInt       n      = A->rmap->n;
2973   PetscInt       cstart = A->cmap->rstart;
2974   PetscInt       *cmap  = mat->garray;
2975   PetscInt       *diagIdx, *offdiagIdx;
2976   Vec            diagV, offdiagV;
2977   PetscScalar    *a, *diagA, *offdiagA;
2978   PetscInt       r;
2979   PetscErrorCode ierr;
2980 
2981   PetscFunctionBegin;
2982   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2983   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2984   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2985   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2986   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2987   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2988   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2989   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2990   for (r = 0; r < n; ++r) {
2991     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2992       a[r]   = diagA[r];
2993       idx[r] = cstart + diagIdx[r];
2994     } else {
2995       a[r]   = offdiagA[r];
2996       idx[r] = cmap[offdiagIdx[r]];
2997     }
2998   }
2999   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3000   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3001   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3002   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3003   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3004   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3010 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3011 {
3012   PetscErrorCode ierr;
3013   Mat            *dummy;
3014 
3015   PetscFunctionBegin;
3016   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3017   *newmat = *dummy;
3018   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 #undef __FUNCT__
3023 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3024 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3025 {
3026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3027   PetscErrorCode ierr;
3028 
3029   PetscFunctionBegin;
3030   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3031   PetscFunctionReturn(0);
3032 }
3033 
3034 #undef __FUNCT__
3035 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3036 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3037 {
3038   PetscErrorCode ierr;
3039   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3040 
3041   PetscFunctionBegin;
3042   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3043   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3044   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3045   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 /* -------------------------------------------------------------------*/
3050 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3051                                        MatGetRow_MPIAIJ,
3052                                        MatRestoreRow_MPIAIJ,
3053                                        MatMult_MPIAIJ,
3054                                 /* 4*/ MatMultAdd_MPIAIJ,
3055                                        MatMultTranspose_MPIAIJ,
3056                                        MatMultTransposeAdd_MPIAIJ,
3057 #if defined(PETSC_HAVE_PBGL)
3058                                        MatSolve_MPIAIJ,
3059 #else
3060                                        0,
3061 #endif
3062                                        0,
3063                                        0,
3064                                 /*10*/ 0,
3065                                        0,
3066                                        0,
3067                                        MatSOR_MPIAIJ,
3068                                        MatTranspose_MPIAIJ,
3069                                 /*15*/ MatGetInfo_MPIAIJ,
3070                                        MatEqual_MPIAIJ,
3071                                        MatGetDiagonal_MPIAIJ,
3072                                        MatDiagonalScale_MPIAIJ,
3073                                        MatNorm_MPIAIJ,
3074                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3075                                        MatAssemblyEnd_MPIAIJ,
3076                                        MatSetOption_MPIAIJ,
3077                                        MatZeroEntries_MPIAIJ,
3078                                 /*24*/ MatZeroRows_MPIAIJ,
3079                                        0,
3080 #if defined(PETSC_HAVE_PBGL)
3081                                        0,
3082 #else
3083                                        0,
3084 #endif
3085                                        0,
3086                                        0,
3087                                 /*29*/ MatSetUp_MPIAIJ,
3088 #if defined(PETSC_HAVE_PBGL)
3089                                        0,
3090 #else
3091                                        0,
3092 #endif
3093                                        0,
3094                                        0,
3095                                        0,
3096                                 /*34*/ MatDuplicate_MPIAIJ,
3097                                        0,
3098                                        0,
3099                                        0,
3100                                        0,
3101                                 /*39*/ MatAXPY_MPIAIJ,
3102                                        MatGetSubMatrices_MPIAIJ,
3103                                        MatIncreaseOverlap_MPIAIJ,
3104                                        MatGetValues_MPIAIJ,
3105                                        MatCopy_MPIAIJ,
3106                                 /*44*/ MatGetRowMax_MPIAIJ,
3107                                        MatScale_MPIAIJ,
3108                                        0,
3109                                        0,
3110                                        MatZeroRowsColumns_MPIAIJ,
3111                                 /*49*/ MatSetRandom_MPIAIJ,
3112                                        0,
3113                                        0,
3114                                        0,
3115                                        0,
3116                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3117                                        0,
3118                                        MatSetUnfactored_MPIAIJ,
3119                                        MatPermute_MPIAIJ,
3120                                        0,
3121                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3122                                        MatDestroy_MPIAIJ,
3123                                        MatView_MPIAIJ,
3124                                        0,
3125                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3126                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3127                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3128                                        0,
3129                                        0,
3130                                        0,
3131                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3132                                        MatGetRowMinAbs_MPIAIJ,
3133                                        0,
3134                                        MatSetColoring_MPIAIJ,
3135                                        0,
3136                                        MatSetValuesAdifor_MPIAIJ,
3137                                 /*75*/ MatFDColoringApply_AIJ,
3138                                        0,
3139                                        0,
3140                                        0,
3141                                        MatFindZeroDiagonals_MPIAIJ,
3142                                 /*80*/ 0,
3143                                        0,
3144                                        0,
3145                                 /*83*/ MatLoad_MPIAIJ,
3146                                        0,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                        0,
3151                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3152                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3153                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3154                                        MatPtAP_MPIAIJ_MPIAIJ,
3155                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3156                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                        0,
3161                                 /*99*/ 0,
3162                                        0,
3163                                        0,
3164                                        MatConjugate_MPIAIJ,
3165                                        0,
3166                                 /*104*/MatSetValuesRow_MPIAIJ,
3167                                        MatRealPart_MPIAIJ,
3168                                        MatImaginaryPart_MPIAIJ,
3169                                        0,
3170                                        0,
3171                                 /*109*/0,
3172                                        MatGetRedundantMatrix_MPIAIJ,
3173                                        MatGetRowMin_MPIAIJ,
3174                                        0,
3175                                        0,
3176                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3177                                        0,
3178                                        0,
3179                                        0,
3180                                        0,
3181                                 /*119*/0,
3182                                        0,
3183                                        0,
3184                                        0,
3185                                        MatGetMultiProcBlock_MPIAIJ,
3186                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3187                                        MatGetColumnNorms_MPIAIJ,
3188                                        MatInvertBlockDiagonal_MPIAIJ,
3189                                        0,
3190                                        MatGetSubMatricesParallel_MPIAIJ,
3191                                 /*129*/0,
3192                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3193                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3194                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3195                                        0,
3196                                 /*134*/0,
3197                                        0,
3198                                        0,
3199                                        0,
3200                                        0,
3201                                 /*139*/0,
3202                                        0,
3203                                        0,
3204                                        MatFDColoringSetUp_MPIXAIJ
3205 };
3206 
3207 /* ----------------------------------------------------------------------------------------*/
3208 
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3211 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3212 {
3213   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3214   PetscErrorCode ierr;
3215 
3216   PetscFunctionBegin;
3217   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3218   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3224 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3225 {
3226   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3227   PetscErrorCode ierr;
3228 
3229   PetscFunctionBegin;
3230   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3231   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 #undef __FUNCT__
3236 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3237 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3238 {
3239   Mat_MPIAIJ     *b;
3240   PetscErrorCode ierr;
3241 
3242   PetscFunctionBegin;
3243   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3244   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3245   b = (Mat_MPIAIJ*)B->data;
3246 
3247   if (!B->preallocated) {
3248     /* Explicitly create 2 MATSEQAIJ matrices. */
3249     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3250     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3251     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3252     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3253     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3254     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3255     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3256     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3257     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3258     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3259   }
3260 
3261   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3262   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3263   B->preallocated = PETSC_TRUE;
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3269 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3270 {
3271   Mat            mat;
3272   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3273   PetscErrorCode ierr;
3274 
3275   PetscFunctionBegin;
3276   *newmat = 0;
3277   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3278   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3279   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3280   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3281   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3282   a       = (Mat_MPIAIJ*)mat->data;
3283 
3284   mat->factortype   = matin->factortype;
3285   mat->assembled    = PETSC_TRUE;
3286   mat->insertmode   = NOT_SET_VALUES;
3287   mat->preallocated = PETSC_TRUE;
3288 
3289   a->size         = oldmat->size;
3290   a->rank         = oldmat->rank;
3291   a->donotstash   = oldmat->donotstash;
3292   a->roworiented  = oldmat->roworiented;
3293   a->rowindices   = 0;
3294   a->rowvalues    = 0;
3295   a->getrowactive = PETSC_FALSE;
3296 
3297   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3298   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3299 
3300   if (oldmat->colmap) {
3301 #if defined(PETSC_USE_CTABLE)
3302     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3303 #else
3304     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3305     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3306     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3307 #endif
3308   } else a->colmap = 0;
3309   if (oldmat->garray) {
3310     PetscInt len;
3311     len  = oldmat->B->cmap->n;
3312     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3313     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3314     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3315   } else a->garray = 0;
3316 
3317   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3318   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3319   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3320   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3321   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3322   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3323   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3324   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3325   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3326   *newmat = mat;
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 
3331 
3332 #undef __FUNCT__
3333 #define __FUNCT__ "MatLoad_MPIAIJ"
3334 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3335 {
3336   PetscScalar    *vals,*svals;
3337   MPI_Comm       comm;
3338   PetscErrorCode ierr;
3339   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3340   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3341   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3342   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3343   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3344   int            fd;
3345   PetscInt       bs = 1;
3346 
3347   PetscFunctionBegin;
3348   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3349   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3350   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3351   if (!rank) {
3352     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3353     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3354     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3355   }
3356 
3357   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3358   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3359   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3360 
3361   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3362 
3363   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3364   M    = header[1]; N = header[2];
3365   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3366   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3367   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3368 
3369   /* If global sizes are set, check if they are consistent with that given in the file */
3370   if (sizesset) {
3371     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3372   }
3373   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3374   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3375 
3376   /* determine ownership of all (block) rows */
3377   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3378   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3379   else m = newMat->rmap->n; /* Set by user */
3380 
3381   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3382   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3383 
3384   /* First process needs enough room for process with most rows */
3385   if (!rank) {
3386     mmax = rowners[1];
3387     for (i=2; i<=size; i++) {
3388       mmax = PetscMax(mmax, rowners[i]);
3389     }
3390   } else mmax = -1;             /* unused, but compilers complain */
3391 
3392   rowners[0] = 0;
3393   for (i=2; i<=size; i++) {
3394     rowners[i] += rowners[i-1];
3395   }
3396   rstart = rowners[rank];
3397   rend   = rowners[rank+1];
3398 
3399   /* distribute row lengths to all processors */
3400   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3401   if (!rank) {
3402     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3403     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3404     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3405     for (j=0; j<m; j++) {
3406       procsnz[0] += ourlens[j];
3407     }
3408     for (i=1; i<size; i++) {
3409       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3410       /* calculate the number of nonzeros on each processor */
3411       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3412         procsnz[i] += rowlengths[j];
3413       }
3414       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3415     }
3416     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3417   } else {
3418     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3419   }
3420 
3421   if (!rank) {
3422     /* determine max buffer needed and allocate it */
3423     maxnz = 0;
3424     for (i=0; i<size; i++) {
3425       maxnz = PetscMax(maxnz,procsnz[i]);
3426     }
3427     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3428 
3429     /* read in my part of the matrix column indices  */
3430     nz   = procsnz[0];
3431     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3432     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3433 
3434     /* read in every one elses and ship off */
3435     for (i=1; i<size; i++) {
3436       nz   = procsnz[i];
3437       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3438       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3439     }
3440     ierr = PetscFree(cols);CHKERRQ(ierr);
3441   } else {
3442     /* determine buffer space needed for message */
3443     nz = 0;
3444     for (i=0; i<m; i++) {
3445       nz += ourlens[i];
3446     }
3447     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3448 
3449     /* receive message of column indices*/
3450     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3451   }
3452 
3453   /* determine column ownership if matrix is not square */
3454   if (N != M) {
3455     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3456     else n = newMat->cmap->n;
3457     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3458     cstart = cend - n;
3459   } else {
3460     cstart = rstart;
3461     cend   = rend;
3462     n      = cend - cstart;
3463   }
3464 
3465   /* loop over local rows, determining number of off diagonal entries */
3466   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3467   jj   = 0;
3468   for (i=0; i<m; i++) {
3469     for (j=0; j<ourlens[i]; j++) {
3470       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3471       jj++;
3472     }
3473   }
3474 
3475   for (i=0; i<m; i++) {
3476     ourlens[i] -= offlens[i];
3477   }
3478   if (!sizesset) {
3479     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3480   }
3481 
3482   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3483 
3484   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3485 
3486   for (i=0; i<m; i++) {
3487     ourlens[i] += offlens[i];
3488   }
3489 
3490   if (!rank) {
3491     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3492 
3493     /* read in my part of the matrix numerical values  */
3494     nz   = procsnz[0];
3495     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3496 
3497     /* insert into matrix */
3498     jj      = rstart;
3499     smycols = mycols;
3500     svals   = vals;
3501     for (i=0; i<m; i++) {
3502       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3503       smycols += ourlens[i];
3504       svals   += ourlens[i];
3505       jj++;
3506     }
3507 
3508     /* read in other processors and ship out */
3509     for (i=1; i<size; i++) {
3510       nz   = procsnz[i];
3511       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3512       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3513     }
3514     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3515   } else {
3516     /* receive numeric values */
3517     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3518 
3519     /* receive message of values*/
3520     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3521 
3522     /* insert into matrix */
3523     jj      = rstart;
3524     smycols = mycols;
3525     svals   = vals;
3526     for (i=0; i<m; i++) {
3527       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3528       smycols += ourlens[i];
3529       svals   += ourlens[i];
3530       jj++;
3531     }
3532   }
3533   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3534   ierr = PetscFree(vals);CHKERRQ(ierr);
3535   ierr = PetscFree(mycols);CHKERRQ(ierr);
3536   ierr = PetscFree(rowners);CHKERRQ(ierr);
3537   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3538   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3539   PetscFunctionReturn(0);
3540 }
3541 
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3544 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3545 {
3546   PetscErrorCode ierr;
3547   IS             iscol_local;
3548   PetscInt       csize;
3549 
3550   PetscFunctionBegin;
3551   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3552   if (call == MAT_REUSE_MATRIX) {
3553     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3554     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3555   } else {
3556     PetscInt cbs;
3557     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3558     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3559     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3560   }
3561   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3562   if (call == MAT_INITIAL_MATRIX) {
3563     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3564     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3565   }
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3570 #undef __FUNCT__
3571 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3572 /*
3573     Not great since it makes two copies of the submatrix, first an SeqAIJ
3574   in local and then by concatenating the local matrices the end result.
3575   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3576 
3577   Note: This requires a sequential iscol with all indices.
3578 */
3579 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3580 {
3581   PetscErrorCode ierr;
3582   PetscMPIInt    rank,size;
3583   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3584   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3585   PetscBool      allcolumns, colflag;
3586   Mat            M,Mreuse;
3587   MatScalar      *vwork,*aa;
3588   MPI_Comm       comm;
3589   Mat_SeqAIJ     *aij;
3590 
3591   PetscFunctionBegin;
3592   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3593   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3595 
3596   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3597   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3598   if (colflag && ncol == mat->cmap->N) {
3599     allcolumns = PETSC_TRUE;
3600   } else {
3601     allcolumns = PETSC_FALSE;
3602   }
3603   if (call ==  MAT_REUSE_MATRIX) {
3604     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3605     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3606     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3607   } else {
3608     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3609   }
3610 
3611   /*
3612       m - number of local rows
3613       n - number of columns (same on all processors)
3614       rstart - first row in new global matrix generated
3615   */
3616   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3617   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3618   if (call == MAT_INITIAL_MATRIX) {
3619     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3620     ii  = aij->i;
3621     jj  = aij->j;
3622 
3623     /*
3624         Determine the number of non-zeros in the diagonal and off-diagonal
3625         portions of the matrix in order to do correct preallocation
3626     */
3627 
3628     /* first get start and end of "diagonal" columns */
3629     if (csize == PETSC_DECIDE) {
3630       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3631       if (mglobal == n) { /* square matrix */
3632         nlocal = m;
3633       } else {
3634         nlocal = n/size + ((n % size) > rank);
3635       }
3636     } else {
3637       nlocal = csize;
3638     }
3639     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3640     rstart = rend - nlocal;
3641     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3642 
3643     /* next, compute all the lengths */
3644     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3645     olens = dlens + m;
3646     for (i=0; i<m; i++) {
3647       jend = ii[i+1] - ii[i];
3648       olen = 0;
3649       dlen = 0;
3650       for (j=0; j<jend; j++) {
3651         if (*jj < rstart || *jj >= rend) olen++;
3652         else dlen++;
3653         jj++;
3654       }
3655       olens[i] = olen;
3656       dlens[i] = dlen;
3657     }
3658     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3659     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3660     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3661     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3662     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3663     ierr = PetscFree(dlens);CHKERRQ(ierr);
3664   } else {
3665     PetscInt ml,nl;
3666 
3667     M    = *newmat;
3668     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3669     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3670     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3671     /*
3672          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3673        rather than the slower MatSetValues().
3674     */
3675     M->was_assembled = PETSC_TRUE;
3676     M->assembled     = PETSC_FALSE;
3677   }
3678   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3679   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3680   ii   = aij->i;
3681   jj   = aij->j;
3682   aa   = aij->a;
3683   for (i=0; i<m; i++) {
3684     row   = rstart + i;
3685     nz    = ii[i+1] - ii[i];
3686     cwork = jj;     jj += nz;
3687     vwork = aa;     aa += nz;
3688     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3689   }
3690 
3691   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693   *newmat = M;
3694 
3695   /* save submatrix used in processor for next request */
3696   if (call ==  MAT_INITIAL_MATRIX) {
3697     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3698     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3699   }
3700   PetscFunctionReturn(0);
3701 }
3702 
3703 #undef __FUNCT__
3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3705 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3706 {
3707   PetscInt       m,cstart, cend,j,nnz,i,d;
3708   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3709   const PetscInt *JJ;
3710   PetscScalar    *values;
3711   PetscErrorCode ierr;
3712 
3713   PetscFunctionBegin;
3714   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3715 
3716   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3717   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3718   m      = B->rmap->n;
3719   cstart = B->cmap->rstart;
3720   cend   = B->cmap->rend;
3721   rstart = B->rmap->rstart;
3722 
3723   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3724 
3725 #if defined(PETSC_USE_DEBUGGING)
3726   for (i=0; i<m; i++) {
3727     nnz = Ii[i+1]- Ii[i];
3728     JJ  = J + Ii[i];
3729     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3730     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3731     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3732   }
3733 #endif
3734 
3735   for (i=0; i<m; i++) {
3736     nnz     = Ii[i+1]- Ii[i];
3737     JJ      = J + Ii[i];
3738     nnz_max = PetscMax(nnz_max,nnz);
3739     d       = 0;
3740     for (j=0; j<nnz; j++) {
3741       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3742     }
3743     d_nnz[i] = d;
3744     o_nnz[i] = nnz - d;
3745   }
3746   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3747   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3748 
3749   if (v) values = (PetscScalar*)v;
3750   else {
3751     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3752   }
3753 
3754   for (i=0; i<m; i++) {
3755     ii   = i + rstart;
3756     nnz  = Ii[i+1]- Ii[i];
3757     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3758   }
3759   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761 
3762   if (!v) {
3763     ierr = PetscFree(values);CHKERRQ(ierr);
3764   }
3765   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3771 /*@
3772    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3773    (the default parallel PETSc format).
3774 
3775    Collective on MPI_Comm
3776 
3777    Input Parameters:
3778 +  B - the matrix
3779 .  i - the indices into j for the start of each local row (starts with zero)
3780 .  j - the column indices for each local row (starts with zero)
3781 -  v - optional values in the matrix
3782 
3783    Level: developer
3784 
3785    Notes:
3786        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3787      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3788      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3789 
3790        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3791 
3792        The format which is used for the sparse matrix input, is equivalent to a
3793     row-major ordering.. i.e for the following matrix, the input data expected is
3794     as shown:
3795 
3796         1 0 0
3797         2 0 3     P0
3798        -------
3799         4 5 6     P1
3800 
3801      Process0 [P0]: rows_owned=[0,1]
3802         i =  {0,1,3}  [size = nrow+1  = 2+1]
3803         j =  {0,0,2}  [size = nz = 6]
3804         v =  {1,2,3}  [size = nz = 6]
3805 
3806      Process1 [P1]: rows_owned=[2]
3807         i =  {0,3}    [size = nrow+1  = 1+1]
3808         j =  {0,1,2}  [size = nz = 6]
3809         v =  {4,5,6}  [size = nz = 6]
3810 
3811 .keywords: matrix, aij, compressed row, sparse, parallel
3812 
3813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3814           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3815 @*/
3816 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3817 {
3818   PetscErrorCode ierr;
3819 
3820   PetscFunctionBegin;
3821   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3822   PetscFunctionReturn(0);
3823 }
3824 
3825 #undef __FUNCT__
3826 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3827 /*@C
3828    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3829    (the default parallel PETSc format).  For good matrix assembly performance
3830    the user should preallocate the matrix storage by setting the parameters
3831    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3832    performance can be increased by more than a factor of 50.
3833 
3834    Collective on MPI_Comm
3835 
3836    Input Parameters:
3837 +  A - the matrix
3838 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3839            (same value is used for all local rows)
3840 .  d_nnz - array containing the number of nonzeros in the various rows of the
3841            DIAGONAL portion of the local submatrix (possibly different for each row)
3842            or NULL, if d_nz is used to specify the nonzero structure.
3843            The size of this array is equal to the number of local rows, i.e 'm'.
3844            For matrices that will be factored, you must leave room for (and set)
3845            the diagonal entry even if it is zero.
3846 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3847            submatrix (same value is used for all local rows).
3848 -  o_nnz - array containing the number of nonzeros in the various rows of the
3849            OFF-DIAGONAL portion of the local submatrix (possibly different for
3850            each row) or NULL, if o_nz is used to specify the nonzero
3851            structure. The size of this array is equal to the number
3852            of local rows, i.e 'm'.
3853 
3854    If the *_nnz parameter is given then the *_nz parameter is ignored
3855 
3856    The AIJ format (also called the Yale sparse matrix format or
3857    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3858    storage.  The stored row and column indices begin with zero.
3859    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3860 
3861    The parallel matrix is partitioned such that the first m0 rows belong to
3862    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3863    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3864 
3865    The DIAGONAL portion of the local submatrix of a processor can be defined
3866    as the submatrix which is obtained by extraction the part corresponding to
3867    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3868    first row that belongs to the processor, r2 is the last row belonging to
3869    the this processor, and c1-c2 is range of indices of the local part of a
3870    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3871    common case of a square matrix, the row and column ranges are the same and
3872    the DIAGONAL part is also square. The remaining portion of the local
3873    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3874 
3875    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3876 
3877    You can call MatGetInfo() to get information on how effective the preallocation was;
3878    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3879    You can also run with the option -info and look for messages with the string
3880    malloc in them to see if additional memory allocation was needed.
3881 
3882    Example usage:
3883 
3884    Consider the following 8x8 matrix with 34 non-zero values, that is
3885    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3886    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3887    as follows:
3888 
3889 .vb
3890             1  2  0  |  0  3  0  |  0  4
3891     Proc0   0  5  6  |  7  0  0  |  8  0
3892             9  0 10  | 11  0  0  | 12  0
3893     -------------------------------------
3894            13  0 14  | 15 16 17  |  0  0
3895     Proc1   0 18  0  | 19 20 21  |  0  0
3896             0  0  0  | 22 23  0  | 24  0
3897     -------------------------------------
3898     Proc2  25 26 27  |  0  0 28  | 29  0
3899            30  0  0  | 31 32 33  |  0 34
3900 .ve
3901 
3902    This can be represented as a collection of submatrices as:
3903 
3904 .vb
3905       A B C
3906       D E F
3907       G H I
3908 .ve
3909 
3910    Where the submatrices A,B,C are owned by proc0, D,E,F are
3911    owned by proc1, G,H,I are owned by proc2.
3912 
3913    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3914    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3915    The 'M','N' parameters are 8,8, and have the same values on all procs.
3916 
3917    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3918    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3919    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3920    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3921    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3922    matrix, ans [DF] as another SeqAIJ matrix.
3923 
3924    When d_nz, o_nz parameters are specified, d_nz storage elements are
3925    allocated for every row of the local diagonal submatrix, and o_nz
3926    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3927    One way to choose d_nz and o_nz is to use the max nonzerors per local
3928    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3929    In this case, the values of d_nz,o_nz are:
3930 .vb
3931      proc0 : dnz = 2, o_nz = 2
3932      proc1 : dnz = 3, o_nz = 2
3933      proc2 : dnz = 1, o_nz = 4
3934 .ve
3935    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3936    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3937    for proc3. i.e we are using 12+15+10=37 storage locations to store
3938    34 values.
3939 
3940    When d_nnz, o_nnz parameters are specified, the storage is specified
3941    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3942    In the above case the values for d_nnz,o_nnz are:
3943 .vb
3944      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3945      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3946      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3947 .ve
3948    Here the space allocated is sum of all the above values i.e 34, and
3949    hence pre-allocation is perfect.
3950 
3951    Level: intermediate
3952 
3953 .keywords: matrix, aij, compressed row, sparse, parallel
3954 
3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3956           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3957 @*/
3958 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3959 {
3960   PetscErrorCode ierr;
3961 
3962   PetscFunctionBegin;
3963   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3964   PetscValidType(B,1);
3965   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3971 /*@
3972      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3973          CSR format the local rows.
3974 
3975    Collective on MPI_Comm
3976 
3977    Input Parameters:
3978 +  comm - MPI communicator
3979 .  m - number of local rows (Cannot be PETSC_DECIDE)
3980 .  n - This value should be the same as the local size used in creating the
3981        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3982        calculated if N is given) For square matrices n is almost always m.
3983 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3984 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3985 .   i - row indices
3986 .   j - column indices
3987 -   a - matrix values
3988 
3989    Output Parameter:
3990 .   mat - the matrix
3991 
3992    Level: intermediate
3993 
3994    Notes:
3995        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3996      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3997      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3998 
3999        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4000 
4001        The format which is used for the sparse matrix input, is equivalent to a
4002     row-major ordering.. i.e for the following matrix, the input data expected is
4003     as shown:
4004 
4005         1 0 0
4006         2 0 3     P0
4007        -------
4008         4 5 6     P1
4009 
4010      Process0 [P0]: rows_owned=[0,1]
4011         i =  {0,1,3}  [size = nrow+1  = 2+1]
4012         j =  {0,0,2}  [size = nz = 6]
4013         v =  {1,2,3}  [size = nz = 6]
4014 
4015      Process1 [P1]: rows_owned=[2]
4016         i =  {0,3}    [size = nrow+1  = 1+1]
4017         j =  {0,1,2}  [size = nz = 6]
4018         v =  {4,5,6}  [size = nz = 6]
4019 
4020 .keywords: matrix, aij, compressed row, sparse, parallel
4021 
4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4023           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4024 @*/
4025 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4026 {
4027   PetscErrorCode ierr;
4028 
4029   PetscFunctionBegin;
4030   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4031   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4032   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4033   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4034   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4035   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4036   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4037   PetscFunctionReturn(0);
4038 }
4039 
4040 #undef __FUNCT__
4041 #define __FUNCT__ "MatCreateAIJ"
4042 /*@C
4043    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4044    (the default parallel PETSc format).  For good matrix assembly performance
4045    the user should preallocate the matrix storage by setting the parameters
4046    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4047    performance can be increased by more than a factor of 50.
4048 
4049    Collective on MPI_Comm
4050 
4051    Input Parameters:
4052 +  comm - MPI communicator
4053 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4054            This value should be the same as the local size used in creating the
4055            y vector for the matrix-vector product y = Ax.
4056 .  n - This value should be the same as the local size used in creating the
4057        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4058        calculated if N is given) For square matrices n is almost always m.
4059 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4060 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4061 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4062            (same value is used for all local rows)
4063 .  d_nnz - array containing the number of nonzeros in the various rows of the
4064            DIAGONAL portion of the local submatrix (possibly different for each row)
4065            or NULL, if d_nz is used to specify the nonzero structure.
4066            The size of this array is equal to the number of local rows, i.e 'm'.
4067 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4068            submatrix (same value is used for all local rows).
4069 -  o_nnz - array containing the number of nonzeros in the various rows of the
4070            OFF-DIAGONAL portion of the local submatrix (possibly different for
4071            each row) or NULL, if o_nz is used to specify the nonzero
4072            structure. The size of this array is equal to the number
4073            of local rows, i.e 'm'.
4074 
4075    Output Parameter:
4076 .  A - the matrix
4077 
4078    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4079    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4080    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4081 
4082    Notes:
4083    If the *_nnz parameter is given then the *_nz parameter is ignored
4084 
4085    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4086    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4087    storage requirements for this matrix.
4088 
4089    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4090    processor than it must be used on all processors that share the object for
4091    that argument.
4092 
4093    The user MUST specify either the local or global matrix dimensions
4094    (possibly both).
4095 
4096    The parallel matrix is partitioned across processors such that the
4097    first m0 rows belong to process 0, the next m1 rows belong to
4098    process 1, the next m2 rows belong to process 2 etc.. where
4099    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4100    values corresponding to [m x N] submatrix.
4101 
4102    The columns are logically partitioned with the n0 columns belonging
4103    to 0th partition, the next n1 columns belonging to the next
4104    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4105 
4106    The DIAGONAL portion of the local submatrix on any given processor
4107    is the submatrix corresponding to the rows and columns m,n
4108    corresponding to the given processor. i.e diagonal matrix on
4109    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4110    etc. The remaining portion of the local submatrix [m x (N-n)]
4111    constitute the OFF-DIAGONAL portion. The example below better
4112    illustrates this concept.
4113 
4114    For a square global matrix we define each processor's diagonal portion
4115    to be its local rows and the corresponding columns (a square submatrix);
4116    each processor's off-diagonal portion encompasses the remainder of the
4117    local matrix (a rectangular submatrix).
4118 
4119    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4120 
4121    When calling this routine with a single process communicator, a matrix of
4122    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4123    type of communicator, use the construction mechanism:
4124      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4125 
4126    By default, this format uses inodes (identical nodes) when possible.
4127    We search for consecutive rows with the same nonzero structure, thereby
4128    reusing matrix information to achieve increased efficiency.
4129 
4130    Options Database Keys:
4131 +  -mat_no_inode  - Do not use inodes
4132 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4133 -  -mat_aij_oneindex - Internally use indexing starting at 1
4134         rather than 0.  Note that when calling MatSetValues(),
4135         the user still MUST index entries starting at 0!
4136 
4137 
4138    Example usage:
4139 
4140    Consider the following 8x8 matrix with 34 non-zero values, that is
4141    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4142    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4143    as follows:
4144 
4145 .vb
4146             1  2  0  |  0  3  0  |  0  4
4147     Proc0   0  5  6  |  7  0  0  |  8  0
4148             9  0 10  | 11  0  0  | 12  0
4149     -------------------------------------
4150            13  0 14  | 15 16 17  |  0  0
4151     Proc1   0 18  0  | 19 20 21  |  0  0
4152             0  0  0  | 22 23  0  | 24  0
4153     -------------------------------------
4154     Proc2  25 26 27  |  0  0 28  | 29  0
4155            30  0  0  | 31 32 33  |  0 34
4156 .ve
4157 
4158    This can be represented as a collection of submatrices as:
4159 
4160 .vb
4161       A B C
4162       D E F
4163       G H I
4164 .ve
4165 
4166    Where the submatrices A,B,C are owned by proc0, D,E,F are
4167    owned by proc1, G,H,I are owned by proc2.
4168 
4169    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4170    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4171    The 'M','N' parameters are 8,8, and have the same values on all procs.
4172 
4173    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4174    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4175    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4176    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4177    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4178    matrix, ans [DF] as another SeqAIJ matrix.
4179 
4180    When d_nz, o_nz parameters are specified, d_nz storage elements are
4181    allocated for every row of the local diagonal submatrix, and o_nz
4182    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4183    One way to choose d_nz and o_nz is to use the max nonzerors per local
4184    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4185    In this case, the values of d_nz,o_nz are:
4186 .vb
4187      proc0 : dnz = 2, o_nz = 2
4188      proc1 : dnz = 3, o_nz = 2
4189      proc2 : dnz = 1, o_nz = 4
4190 .ve
4191    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4192    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4193    for proc3. i.e we are using 12+15+10=37 storage locations to store
4194    34 values.
4195 
4196    When d_nnz, o_nnz parameters are specified, the storage is specified
4197    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4198    In the above case the values for d_nnz,o_nnz are:
4199 .vb
4200      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4201      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4202      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4203 .ve
4204    Here the space allocated is sum of all the above values i.e 34, and
4205    hence pre-allocation is perfect.
4206 
4207    Level: intermediate
4208 
4209 .keywords: matrix, aij, compressed row, sparse, parallel
4210 
4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4212           MPIAIJ, MatCreateMPIAIJWithArrays()
4213 @*/
4214 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4215 {
4216   PetscErrorCode ierr;
4217   PetscMPIInt    size;
4218 
4219   PetscFunctionBegin;
4220   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4221   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4222   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4223   if (size > 1) {
4224     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4225     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4226   } else {
4227     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4228     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4229   }
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 #undef __FUNCT__
4234 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4235 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4236 {
4237   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4238 
4239   PetscFunctionBegin;
4240   *Ad     = a->A;
4241   *Ao     = a->B;
4242   *colmap = a->garray;
4243   PetscFunctionReturn(0);
4244 }
4245 
4246 #undef __FUNCT__
4247 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4248 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4249 {
4250   PetscErrorCode ierr;
4251   PetscInt       i;
4252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4253 
4254   PetscFunctionBegin;
4255   if (coloring->ctype == IS_COLORING_GLOBAL) {
4256     ISColoringValue *allcolors,*colors;
4257     ISColoring      ocoloring;
4258 
4259     /* set coloring for diagonal portion */
4260     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4261 
4262     /* set coloring for off-diagonal portion */
4263     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4264     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4265     for (i=0; i<a->B->cmap->n; i++) {
4266       colors[i] = allcolors[a->garray[i]];
4267     }
4268     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4269     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4270     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4271     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4272   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4273     ISColoringValue *colors;
4274     PetscInt        *larray;
4275     ISColoring      ocoloring;
4276 
4277     /* set coloring for diagonal portion */
4278     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4279     for (i=0; i<a->A->cmap->n; i++) {
4280       larray[i] = i + A->cmap->rstart;
4281     }
4282     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4283     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4284     for (i=0; i<a->A->cmap->n; i++) {
4285       colors[i] = coloring->colors[larray[i]];
4286     }
4287     ierr = PetscFree(larray);CHKERRQ(ierr);
4288     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4289     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4290     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4291 
4292     /* set coloring for off-diagonal portion */
4293     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4294     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4295     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4296     for (i=0; i<a->B->cmap->n; i++) {
4297       colors[i] = coloring->colors[larray[i]];
4298     }
4299     ierr = PetscFree(larray);CHKERRQ(ierr);
4300     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4301     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4302     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4303   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4304   PetscFunctionReturn(0);
4305 }
4306 
4307 #undef __FUNCT__
4308 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4309 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4310 {
4311   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4312   PetscErrorCode ierr;
4313 
4314   PetscFunctionBegin;
4315   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4316   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4317   PetscFunctionReturn(0);
4318 }
4319 
4320 #undef __FUNCT__
4321 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4322 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4323 {
4324   PetscErrorCode ierr;
4325   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4326   PetscInt       *indx;
4327 
4328   PetscFunctionBegin;
4329   /* This routine will ONLY return MPIAIJ type matrix */
4330   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4331   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4332   if (n == PETSC_DECIDE) {
4333     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4334   }
4335   /* Check sum(n) = N */
4336   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4337   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4338 
4339   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4340   rstart -= m;
4341 
4342   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4343   for (i=0; i<m; i++) {
4344     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4345     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4346     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4347   }
4348 
4349   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4350   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4351   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4352   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4353   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4354   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4360 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4361 {
4362   PetscErrorCode ierr;
4363   PetscInt       m,N,i,rstart,nnz,Ii;
4364   PetscInt       *indx;
4365   PetscScalar    *values;
4366 
4367   PetscFunctionBegin;
4368   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4369   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4370   for (i=0; i<m; i++) {
4371     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4372     Ii   = i + rstart;
4373     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4374     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4375   }
4376   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   PetscFunctionReturn(0);
4379 }
4380 
4381 #undef __FUNCT__
4382 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4383 /*@
4384       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4385                  matrices from each processor
4386 
4387     Collective on MPI_Comm
4388 
4389    Input Parameters:
4390 +    comm - the communicators the parallel matrix will live on
4391 .    inmat - the input sequential matrices
4392 .    n - number of local columns (or PETSC_DECIDE)
4393 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4394 
4395    Output Parameter:
4396 .    outmat - the parallel matrix generated
4397 
4398     Level: advanced
4399 
4400    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4401 
4402 @*/
4403 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4404 {
4405   PetscErrorCode ierr;
4406   PetscMPIInt    size;
4407 
4408   PetscFunctionBegin;
4409   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4410   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4411   if (size == 1) {
4412     if (scall == MAT_INITIAL_MATRIX) {
4413       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4414     } else {
4415       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4416     }
4417   } else {
4418     if (scall == MAT_INITIAL_MATRIX) {
4419       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4420     }
4421     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4422   }
4423   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "MatFileSplit"
4429 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4430 {
4431   PetscErrorCode    ierr;
4432   PetscMPIInt       rank;
4433   PetscInt          m,N,i,rstart,nnz;
4434   size_t            len;
4435   const PetscInt    *indx;
4436   PetscViewer       out;
4437   char              *name;
4438   Mat               B;
4439   const PetscScalar *values;
4440 
4441   PetscFunctionBegin;
4442   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4443   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4444   /* Should this be the type of the diagonal block of A? */
4445   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4446   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4447   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4448   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4449   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4450   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4451   for (i=0; i<m; i++) {
4452     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4453     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4454     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4455   }
4456   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4457   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4458 
4459   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4460   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4461   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4462   sprintf(name,"%s.%d",outfile,rank);
4463   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4464   ierr = PetscFree(name);CHKERRQ(ierr);
4465   ierr = MatView(B,out);CHKERRQ(ierr);
4466   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4467   ierr = MatDestroy(&B);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4472 #undef __FUNCT__
4473 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4474 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4475 {
4476   PetscErrorCode      ierr;
4477   Mat_Merge_SeqsToMPI *merge;
4478   PetscContainer      container;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4482   if (container) {
4483     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4496     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4497     ierr = PetscFree(merge);CHKERRQ(ierr);
4498     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4499   }
4500   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4501   PetscFunctionReturn(0);
4502 }
4503 
4504 #include <../src/mat/utils/freespace.h>
4505 #include <petscbt.h>
4506 
4507 #undef __FUNCT__
4508 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4509 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4510 {
4511   PetscErrorCode      ierr;
4512   MPI_Comm            comm;
4513   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4514   PetscMPIInt         size,rank,taga,*len_s;
4515   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4516   PetscInt            proc,m;
4517   PetscInt            **buf_ri,**buf_rj;
4518   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4519   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4520   MPI_Request         *s_waits,*r_waits;
4521   MPI_Status          *status;
4522   MatScalar           *aa=a->a;
4523   MatScalar           **abuf_r,*ba_i;
4524   Mat_Merge_SeqsToMPI *merge;
4525   PetscContainer      container;
4526 
4527   PetscFunctionBegin;
4528   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4529   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4530 
4531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4532   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4533 
4534   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4535   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4536 
4537   bi     = merge->bi;
4538   bj     = merge->bj;
4539   buf_ri = merge->buf_ri;
4540   buf_rj = merge->buf_rj;
4541 
4542   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4543   owners = merge->rowmap->range;
4544   len_s  = merge->len_s;
4545 
4546   /* send and recv matrix values */
4547   /*-----------------------------*/
4548   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4549   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4550 
4551   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4552   for (proc=0,k=0; proc<size; proc++) {
4553     if (!len_s[proc]) continue;
4554     i    = owners[proc];
4555     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4556     k++;
4557   }
4558 
4559   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4560   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4561   ierr = PetscFree(status);CHKERRQ(ierr);
4562 
4563   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4564   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4565 
4566   /* insert mat values of mpimat */
4567   /*----------------------------*/
4568   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4569   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4570 
4571   for (k=0; k<merge->nrecv; k++) {
4572     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4573     nrows       = *(buf_ri_k[k]);
4574     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4575     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4576   }
4577 
4578   /* set values of ba */
4579   m = merge->rowmap->n;
4580   for (i=0; i<m; i++) {
4581     arow = owners[rank] + i;
4582     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4583     bnzi = bi[i+1] - bi[i];
4584     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4585 
4586     /* add local non-zero vals of this proc's seqmat into ba */
4587     anzi   = ai[arow+1] - ai[arow];
4588     aj     = a->j + ai[arow];
4589     aa     = a->a + ai[arow];
4590     nextaj = 0;
4591     for (j=0; nextaj<anzi; j++) {
4592       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4593         ba_i[j] += aa[nextaj++];
4594       }
4595     }
4596 
4597     /* add received vals into ba */
4598     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4599       /* i-th row */
4600       if (i == *nextrow[k]) {
4601         anzi   = *(nextai[k]+1) - *nextai[k];
4602         aj     = buf_rj[k] + *(nextai[k]);
4603         aa     = abuf_r[k] + *(nextai[k]);
4604         nextaj = 0;
4605         for (j=0; nextaj<anzi; j++) {
4606           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4607             ba_i[j] += aa[nextaj++];
4608           }
4609         }
4610         nextrow[k]++; nextai[k]++;
4611       }
4612     }
4613     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4614   }
4615   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4616   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4617 
4618   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4619   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4620   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4621   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4622   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4623   PetscFunctionReturn(0);
4624 }
4625 
4626 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4627 
4628 #undef __FUNCT__
4629 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4630 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4631 {
4632   PetscErrorCode      ierr;
4633   Mat                 B_mpi;
4634   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4635   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4636   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4637   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4638   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4639   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4640   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4641   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4642   MPI_Status          *status;
4643   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4644   PetscBT             lnkbt;
4645   Mat_Merge_SeqsToMPI *merge;
4646   PetscContainer      container;
4647 
4648   PetscFunctionBegin;
4649   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4650 
4651   /* make sure it is a PETSc comm */
4652   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4654   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4655 
4656   ierr = PetscNew(&merge);CHKERRQ(ierr);
4657   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4658 
4659   /* determine row ownership */
4660   /*---------------------------------------------------------*/
4661   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4662   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4663   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4664   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4666   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4667   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4668 
4669   m      = merge->rowmap->n;
4670   owners = merge->rowmap->range;
4671 
4672   /* determine the number of messages to send, their lengths */
4673   /*---------------------------------------------------------*/
4674   len_s = merge->len_s;
4675 
4676   len          = 0; /* length of buf_si[] */
4677   merge->nsend = 0;
4678   for (proc=0; proc<size; proc++) {
4679     len_si[proc] = 0;
4680     if (proc == rank) {
4681       len_s[proc] = 0;
4682     } else {
4683       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4684       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4685     }
4686     if (len_s[proc]) {
4687       merge->nsend++;
4688       nrows = 0;
4689       for (i=owners[proc]; i<owners[proc+1]; i++) {
4690         if (ai[i+1] > ai[i]) nrows++;
4691       }
4692       len_si[proc] = 2*(nrows+1);
4693       len         += len_si[proc];
4694     }
4695   }
4696 
4697   /* determine the number and length of messages to receive for ij-structure */
4698   /*-------------------------------------------------------------------------*/
4699   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4700   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4701 
4702   /* post the Irecv of j-structure */
4703   /*-------------------------------*/
4704   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4705   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4706 
4707   /* post the Isend of j-structure */
4708   /*--------------------------------*/
4709   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4710 
4711   for (proc=0, k=0; proc<size; proc++) {
4712     if (!len_s[proc]) continue;
4713     i    = owners[proc];
4714     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4715     k++;
4716   }
4717 
4718   /* receives and sends of j-structure are complete */
4719   /*------------------------------------------------*/
4720   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4721   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4722 
4723   /* send and recv i-structure */
4724   /*---------------------------*/
4725   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4726   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4727 
4728   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4729   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4730   for (proc=0,k=0; proc<size; proc++) {
4731     if (!len_s[proc]) continue;
4732     /* form outgoing message for i-structure:
4733          buf_si[0]:                 nrows to be sent
4734                [1:nrows]:           row index (global)
4735                [nrows+1:2*nrows+1]: i-structure index
4736     */
4737     /*-------------------------------------------*/
4738     nrows       = len_si[proc]/2 - 1;
4739     buf_si_i    = buf_si + nrows+1;
4740     buf_si[0]   = nrows;
4741     buf_si_i[0] = 0;
4742     nrows       = 0;
4743     for (i=owners[proc]; i<owners[proc+1]; i++) {
4744       anzi = ai[i+1] - ai[i];
4745       if (anzi) {
4746         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4747         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4748         nrows++;
4749       }
4750     }
4751     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4752     k++;
4753     buf_si += len_si[proc];
4754   }
4755 
4756   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4757   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4758 
4759   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4760   for (i=0; i<merge->nrecv; i++) {
4761     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4762   }
4763 
4764   ierr = PetscFree(len_si);CHKERRQ(ierr);
4765   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4766   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4767   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4768   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4769   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4770   ierr = PetscFree(status);CHKERRQ(ierr);
4771 
4772   /* compute a local seq matrix in each processor */
4773   /*----------------------------------------------*/
4774   /* allocate bi array and free space for accumulating nonzero column info */
4775   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4776   bi[0] = 0;
4777 
4778   /* create and initialize a linked list */
4779   nlnk = N+1;
4780   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4781 
4782   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4783   len  = ai[owners[rank+1]] - ai[owners[rank]];
4784   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4785 
4786   current_space = free_space;
4787 
4788   /* determine symbolic info for each local row */
4789   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4790 
4791   for (k=0; k<merge->nrecv; k++) {
4792     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4793     nrows       = *buf_ri_k[k];
4794     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4795     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4796   }
4797 
4798   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4799   len  = 0;
4800   for (i=0; i<m; i++) {
4801     bnzi = 0;
4802     /* add local non-zero cols of this proc's seqmat into lnk */
4803     arow  = owners[rank] + i;
4804     anzi  = ai[arow+1] - ai[arow];
4805     aj    = a->j + ai[arow];
4806     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4807     bnzi += nlnk;
4808     /* add received col data into lnk */
4809     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4810       if (i == *nextrow[k]) { /* i-th row */
4811         anzi  = *(nextai[k]+1) - *nextai[k];
4812         aj    = buf_rj[k] + *nextai[k];
4813         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4814         bnzi += nlnk;
4815         nextrow[k]++; nextai[k]++;
4816       }
4817     }
4818     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4819 
4820     /* if free space is not available, make more free space */
4821     if (current_space->local_remaining<bnzi) {
4822       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4823       nspacedouble++;
4824     }
4825     /* copy data into free space, then initialize lnk */
4826     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4827     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4828 
4829     current_space->array           += bnzi;
4830     current_space->local_used      += bnzi;
4831     current_space->local_remaining -= bnzi;
4832 
4833     bi[i+1] = bi[i] + bnzi;
4834   }
4835 
4836   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4837 
4838   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4839   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4840   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4841 
4842   /* create symbolic parallel matrix B_mpi */
4843   /*---------------------------------------*/
4844   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4845   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4846   if (n==PETSC_DECIDE) {
4847     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4848   } else {
4849     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4850   }
4851   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4852   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4853   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4854   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4855   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4856 
4857   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4858   B_mpi->assembled    = PETSC_FALSE;
4859   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4860   merge->bi           = bi;
4861   merge->bj           = bj;
4862   merge->buf_ri       = buf_ri;
4863   merge->buf_rj       = buf_rj;
4864   merge->coi          = NULL;
4865   merge->coj          = NULL;
4866   merge->owners_co    = NULL;
4867 
4868   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4869 
4870   /* attach the supporting struct to B_mpi for reuse */
4871   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4872   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4873   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4874   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4875   *mpimat = B_mpi;
4876 
4877   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4878   PetscFunctionReturn(0);
4879 }
4880 
4881 #undef __FUNCT__
4882 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4883 /*@C
4884       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4885                  matrices from each processor
4886 
4887     Collective on MPI_Comm
4888 
4889    Input Parameters:
4890 +    comm - the communicators the parallel matrix will live on
4891 .    seqmat - the input sequential matrices
4892 .    m - number of local rows (or PETSC_DECIDE)
4893 .    n - number of local columns (or PETSC_DECIDE)
4894 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4895 
4896    Output Parameter:
4897 .    mpimat - the parallel matrix generated
4898 
4899     Level: advanced
4900 
4901    Notes:
4902      The dimensions of the sequential matrix in each processor MUST be the same.
4903      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4904      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4905 @*/
4906 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4907 {
4908   PetscErrorCode ierr;
4909   PetscMPIInt    size;
4910 
4911   PetscFunctionBegin;
4912   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4913   if (size == 1) {
4914     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4915     if (scall == MAT_INITIAL_MATRIX) {
4916       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4917     } else {
4918       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4919     }
4920     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4921     PetscFunctionReturn(0);
4922   }
4923   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924   if (scall == MAT_INITIAL_MATRIX) {
4925     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4926   }
4927   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4928   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 #undef __FUNCT__
4933 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4934 /*@
4935      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4936           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4937           with MatGetSize()
4938 
4939     Not Collective
4940 
4941    Input Parameters:
4942 +    A - the matrix
4943 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4944 
4945    Output Parameter:
4946 .    A_loc - the local sequential matrix generated
4947 
4948     Level: developer
4949 
4950 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4951 
4952 @*/
4953 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4954 {
4955   PetscErrorCode ierr;
4956   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4957   Mat_SeqAIJ     *mat,*a,*b;
4958   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4959   MatScalar      *aa,*ba,*cam;
4960   PetscScalar    *ca;
4961   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4962   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4963   PetscBool      match;
4964 
4965   PetscFunctionBegin;
4966   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4967   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4968   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4969   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4970   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4971   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4972   aa = a->a; ba = b->a;
4973   if (scall == MAT_INITIAL_MATRIX) {
4974     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4975     ci[0] = 0;
4976     for (i=0; i<am; i++) {
4977       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4978     }
4979     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4980     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4981     k    = 0;
4982     for (i=0; i<am; i++) {
4983       ncols_o = bi[i+1] - bi[i];
4984       ncols_d = ai[i+1] - ai[i];
4985       /* off-diagonal portion of A */
4986       for (jo=0; jo<ncols_o; jo++) {
4987         col = cmap[*bj];
4988         if (col >= cstart) break;
4989         cj[k]   = col; bj++;
4990         ca[k++] = *ba++;
4991       }
4992       /* diagonal portion of A */
4993       for (j=0; j<ncols_d; j++) {
4994         cj[k]   = cstart + *aj++;
4995         ca[k++] = *aa++;
4996       }
4997       /* off-diagonal portion of A */
4998       for (j=jo; j<ncols_o; j++) {
4999         cj[k]   = cmap[*bj++];
5000         ca[k++] = *ba++;
5001       }
5002     }
5003     /* put together the new matrix */
5004     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5005     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5006     /* Since these are PETSc arrays, change flags to free them as necessary. */
5007     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5008     mat->free_a  = PETSC_TRUE;
5009     mat->free_ij = PETSC_TRUE;
5010     mat->nonew   = 0;
5011   } else if (scall == MAT_REUSE_MATRIX) {
5012     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5013     ci = mat->i; cj = mat->j; cam = mat->a;
5014     for (i=0; i<am; i++) {
5015       /* off-diagonal portion of A */
5016       ncols_o = bi[i+1] - bi[i];
5017       for (jo=0; jo<ncols_o; jo++) {
5018         col = cmap[*bj];
5019         if (col >= cstart) break;
5020         *cam++ = *ba++; bj++;
5021       }
5022       /* diagonal portion of A */
5023       ncols_d = ai[i+1] - ai[i];
5024       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5025       /* off-diagonal portion of A */
5026       for (j=jo; j<ncols_o; j++) {
5027         *cam++ = *ba++; bj++;
5028       }
5029     }
5030   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5031   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5032   PetscFunctionReturn(0);
5033 }
5034 
5035 #undef __FUNCT__
5036 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5037 /*@C
5038      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5039 
5040     Not Collective
5041 
5042    Input Parameters:
5043 +    A - the matrix
5044 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5045 -    row, col - index sets of rows and columns to extract (or NULL)
5046 
5047    Output Parameter:
5048 .    A_loc - the local sequential matrix generated
5049 
5050     Level: developer
5051 
5052 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5053 
5054 @*/
5055 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5056 {
5057   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5058   PetscErrorCode ierr;
5059   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5060   IS             isrowa,iscola;
5061   Mat            *aloc;
5062   PetscBool      match;
5063 
5064   PetscFunctionBegin;
5065   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5066   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5067   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5068   if (!row) {
5069     start = A->rmap->rstart; end = A->rmap->rend;
5070     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5071   } else {
5072     isrowa = *row;
5073   }
5074   if (!col) {
5075     start = A->cmap->rstart;
5076     cmap  = a->garray;
5077     nzA   = a->A->cmap->n;
5078     nzB   = a->B->cmap->n;
5079     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5080     ncols = 0;
5081     for (i=0; i<nzB; i++) {
5082       if (cmap[i] < start) idx[ncols++] = cmap[i];
5083       else break;
5084     }
5085     imark = i;
5086     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5087     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5088     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5089   } else {
5090     iscola = *col;
5091   }
5092   if (scall != MAT_INITIAL_MATRIX) {
5093     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5094     aloc[0] = *A_loc;
5095   }
5096   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5097   *A_loc = aloc[0];
5098   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5099   if (!row) {
5100     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5101   }
5102   if (!col) {
5103     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5104   }
5105   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5106   PetscFunctionReturn(0);
5107 }
5108 
5109 #undef __FUNCT__
5110 #define __FUNCT__ "MatGetBrowsOfAcols"
5111 /*@C
5112     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5113 
5114     Collective on Mat
5115 
5116    Input Parameters:
5117 +    A,B - the matrices in mpiaij format
5118 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5119 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5120 
5121    Output Parameter:
5122 +    rowb, colb - index sets of rows and columns of B to extract
5123 -    B_seq - the sequential matrix generated
5124 
5125     Level: developer
5126 
5127 @*/
5128 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5129 {
5130   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5131   PetscErrorCode ierr;
5132   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5133   IS             isrowb,iscolb;
5134   Mat            *bseq=NULL;
5135 
5136   PetscFunctionBegin;
5137   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5138     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5139   }
5140   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5141 
5142   if (scall == MAT_INITIAL_MATRIX) {
5143     start = A->cmap->rstart;
5144     cmap  = a->garray;
5145     nzA   = a->A->cmap->n;
5146     nzB   = a->B->cmap->n;
5147     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5148     ncols = 0;
5149     for (i=0; i<nzB; i++) {  /* row < local row index */
5150       if (cmap[i] < start) idx[ncols++] = cmap[i];
5151       else break;
5152     }
5153     imark = i;
5154     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5155     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5156     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5157     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5158   } else {
5159     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5160     isrowb  = *rowb; iscolb = *colb;
5161     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5162     bseq[0] = *B_seq;
5163   }
5164   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5165   *B_seq = bseq[0];
5166   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5167   if (!rowb) {
5168     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5169   } else {
5170     *rowb = isrowb;
5171   }
5172   if (!colb) {
5173     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5174   } else {
5175     *colb = iscolb;
5176   }
5177   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5178   PetscFunctionReturn(0);
5179 }
5180 
5181 #undef __FUNCT__
5182 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5183 /*
5184     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5185     of the OFF-DIAGONAL portion of local A
5186 
5187     Collective on Mat
5188 
5189    Input Parameters:
5190 +    A,B - the matrices in mpiaij format
5191 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5192 
5193    Output Parameter:
5194 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5195 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5196 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5197 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5198 
5199     Level: developer
5200 
5201 */
5202 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5203 {
5204   VecScatter_MPI_General *gen_to,*gen_from;
5205   PetscErrorCode         ierr;
5206   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5207   Mat_SeqAIJ             *b_oth;
5208   VecScatter             ctx =a->Mvctx;
5209   MPI_Comm               comm;
5210   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5211   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5212   PetscScalar            *rvalues,*svalues;
5213   MatScalar              *b_otha,*bufa,*bufA;
5214   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5215   MPI_Request            *rwaits = NULL,*swaits = NULL;
5216   MPI_Status             *sstatus,rstatus;
5217   PetscMPIInt            jj;
5218   PetscInt               *cols,sbs,rbs;
5219   PetscScalar            *vals;
5220 
5221   PetscFunctionBegin;
5222   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5223   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5224     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5225   }
5226   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5227   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5228 
5229   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5230   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5231   rvalues  = gen_from->values; /* holds the length of receiving row */
5232   svalues  = gen_to->values;   /* holds the length of sending row */
5233   nrecvs   = gen_from->n;
5234   nsends   = gen_to->n;
5235 
5236   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5237   srow    = gen_to->indices;    /* local row index to be sent */
5238   sstarts = gen_to->starts;
5239   sprocs  = gen_to->procs;
5240   sstatus = gen_to->sstatus;
5241   sbs     = gen_to->bs;
5242   rstarts = gen_from->starts;
5243   rprocs  = gen_from->procs;
5244   rbs     = gen_from->bs;
5245 
5246   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5247   if (scall == MAT_INITIAL_MATRIX) {
5248     /* i-array */
5249     /*---------*/
5250     /*  post receives */
5251     for (i=0; i<nrecvs; i++) {
5252       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5253       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5254       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5255     }
5256 
5257     /* pack the outgoing message */
5258     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5259 
5260     sstartsj[0] = 0;
5261     rstartsj[0] = 0;
5262     len         = 0; /* total length of j or a array to be sent */
5263     k           = 0;
5264     for (i=0; i<nsends; i++) {
5265       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5266       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5267       for (j=0; j<nrows; j++) {
5268         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5269         for (l=0; l<sbs; l++) {
5270           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5271 
5272           rowlen[j*sbs+l] = ncols;
5273 
5274           len += ncols;
5275           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5276         }
5277         k++;
5278       }
5279       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5280 
5281       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5282     }
5283     /* recvs and sends of i-array are completed */
5284     i = nrecvs;
5285     while (i--) {
5286       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5287     }
5288     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5289 
5290     /* allocate buffers for sending j and a arrays */
5291     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5292     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5293 
5294     /* create i-array of B_oth */
5295     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5296 
5297     b_othi[0] = 0;
5298     len       = 0; /* total length of j or a array to be received */
5299     k         = 0;
5300     for (i=0; i<nrecvs; i++) {
5301       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5302       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5303       for (j=0; j<nrows; j++) {
5304         b_othi[k+1] = b_othi[k] + rowlen[j];
5305         len        += rowlen[j]; k++;
5306       }
5307       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5308     }
5309 
5310     /* allocate space for j and a arrrays of B_oth */
5311     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5312     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5313 
5314     /* j-array */
5315     /*---------*/
5316     /*  post receives of j-array */
5317     for (i=0; i<nrecvs; i++) {
5318       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5319       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5320     }
5321 
5322     /* pack the outgoing message j-array */
5323     k = 0;
5324     for (i=0; i<nsends; i++) {
5325       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5326       bufJ  = bufj+sstartsj[i];
5327       for (j=0; j<nrows; j++) {
5328         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5329         for (ll=0; ll<sbs; ll++) {
5330           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5331           for (l=0; l<ncols; l++) {
5332             *bufJ++ = cols[l];
5333           }
5334           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5335         }
5336       }
5337       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5338     }
5339 
5340     /* recvs and sends of j-array are completed */
5341     i = nrecvs;
5342     while (i--) {
5343       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5344     }
5345     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5346   } else if (scall == MAT_REUSE_MATRIX) {
5347     sstartsj = *startsj_s;
5348     rstartsj = *startsj_r;
5349     bufa     = *bufa_ptr;
5350     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5351     b_otha   = b_oth->a;
5352   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5353 
5354   /* a-array */
5355   /*---------*/
5356   /*  post receives of a-array */
5357   for (i=0; i<nrecvs; i++) {
5358     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5359     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5360   }
5361 
5362   /* pack the outgoing message a-array */
5363   k = 0;
5364   for (i=0; i<nsends; i++) {
5365     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5366     bufA  = bufa+sstartsj[i];
5367     for (j=0; j<nrows; j++) {
5368       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5369       for (ll=0; ll<sbs; ll++) {
5370         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5371         for (l=0; l<ncols; l++) {
5372           *bufA++ = vals[l];
5373         }
5374         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5375       }
5376     }
5377     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5378   }
5379   /* recvs and sends of a-array are completed */
5380   i = nrecvs;
5381   while (i--) {
5382     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5383   }
5384   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5385   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5386 
5387   if (scall == MAT_INITIAL_MATRIX) {
5388     /* put together the new matrix */
5389     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5390 
5391     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5392     /* Since these are PETSc arrays, change flags to free them as necessary. */
5393     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5394     b_oth->free_a  = PETSC_TRUE;
5395     b_oth->free_ij = PETSC_TRUE;
5396     b_oth->nonew   = 0;
5397 
5398     ierr = PetscFree(bufj);CHKERRQ(ierr);
5399     if (!startsj_s || !bufa_ptr) {
5400       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5401       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5402     } else {
5403       *startsj_s = sstartsj;
5404       *startsj_r = rstartsj;
5405       *bufa_ptr  = bufa;
5406     }
5407   }
5408   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5409   PetscFunctionReturn(0);
5410 }
5411 
5412 #undef __FUNCT__
5413 #define __FUNCT__ "MatGetCommunicationStructs"
5414 /*@C
5415   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5416 
5417   Not Collective
5418 
5419   Input Parameters:
5420 . A - The matrix in mpiaij format
5421 
5422   Output Parameter:
5423 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5424 . colmap - A map from global column index to local index into lvec
5425 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5426 
5427   Level: developer
5428 
5429 @*/
5430 #if defined(PETSC_USE_CTABLE)
5431 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5432 #else
5433 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5434 #endif
5435 {
5436   Mat_MPIAIJ *a;
5437 
5438   PetscFunctionBegin;
5439   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5440   PetscValidPointer(lvec, 2);
5441   PetscValidPointer(colmap, 3);
5442   PetscValidPointer(multScatter, 4);
5443   a = (Mat_MPIAIJ*) A->data;
5444   if (lvec) *lvec = a->lvec;
5445   if (colmap) *colmap = a->colmap;
5446   if (multScatter) *multScatter = a->Mvctx;
5447   PetscFunctionReturn(0);
5448 }
5449 
5450 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5452 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5453 
5454 #undef __FUNCT__
5455 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5456 /*
5457     Computes (B'*A')' since computing B*A directly is untenable
5458 
5459                n                       p                          p
5460         (              )       (              )         (                  )
5461       m (      A       )  *  n (       B      )   =   m (         C        )
5462         (              )       (              )         (                  )
5463 
5464 */
5465 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5466 {
5467   PetscErrorCode ierr;
5468   Mat            At,Bt,Ct;
5469 
5470   PetscFunctionBegin;
5471   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5472   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5473   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5474   ierr = MatDestroy(&At);CHKERRQ(ierr);
5475   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5476   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5477   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5478   PetscFunctionReturn(0);
5479 }
5480 
5481 #undef __FUNCT__
5482 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5483 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5484 {
5485   PetscErrorCode ierr;
5486   PetscInt       m=A->rmap->n,n=B->cmap->n;
5487   Mat            Cmat;
5488 
5489   PetscFunctionBegin;
5490   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5491   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5492   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5493   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5494   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5495   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5496   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5497   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5498 
5499   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5500 
5501   *C = Cmat;
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 /* ----------------------------------------------------------------*/
5506 #undef __FUNCT__
5507 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5508 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5509 {
5510   PetscErrorCode ierr;
5511 
5512   PetscFunctionBegin;
5513   if (scall == MAT_INITIAL_MATRIX) {
5514     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5515     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5516     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5517   }
5518   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5519   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5520   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5521   PetscFunctionReturn(0);
5522 }
5523 
5524 #if defined(PETSC_HAVE_MUMPS)
5525 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_PASTIX)
5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5529 #endif
5530 #if defined(PETSC_HAVE_SUPERLU_DIST)
5531 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5532 #endif
5533 #if defined(PETSC_HAVE_CLIQUE)
5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5535 #endif
5536 
5537 /*MC
5538    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5539 
5540    Options Database Keys:
5541 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5542 
5543   Level: beginner
5544 
5545 .seealso: MatCreateAIJ()
5546 M*/
5547 
5548 #undef __FUNCT__
5549 #define __FUNCT__ "MatCreate_MPIAIJ"
5550 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5551 {
5552   Mat_MPIAIJ     *b;
5553   PetscErrorCode ierr;
5554   PetscMPIInt    size;
5555 
5556   PetscFunctionBegin;
5557   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5558 
5559   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5560   B->data       = (void*)b;
5561   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5562   B->assembled  = PETSC_FALSE;
5563   B->insertmode = NOT_SET_VALUES;
5564   b->size       = size;
5565 
5566   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5567 
5568   /* build cache for off array entries formed */
5569   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5570 
5571   b->donotstash  = PETSC_FALSE;
5572   b->colmap      = 0;
5573   b->garray      = 0;
5574   b->roworiented = PETSC_TRUE;
5575 
5576   /* stuff used for matrix vector multiply */
5577   b->lvec  = NULL;
5578   b->Mvctx = NULL;
5579 
5580   /* stuff for MatGetRow() */
5581   b->rowindices   = 0;
5582   b->rowvalues    = 0;
5583   b->getrowactive = PETSC_FALSE;
5584 
5585   /* flexible pointer used in CUSP/CUSPARSE classes */
5586   b->spptr = NULL;
5587 
5588 #if defined(PETSC_HAVE_MUMPS)
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5590 #endif
5591 #if defined(PETSC_HAVE_PASTIX)
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5593 #endif
5594 #if defined(PETSC_HAVE_SUPERLU_DIST)
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5596 #endif
5597 #if defined(PETSC_HAVE_CLIQUE)
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5599 #endif
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5614   PetscFunctionReturn(0);
5615 }
5616 
5617 #undef __FUNCT__
5618 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5619 /*@
5620      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5621          and "off-diagonal" part of the matrix in CSR format.
5622 
5623    Collective on MPI_Comm
5624 
5625    Input Parameters:
5626 +  comm - MPI communicator
5627 .  m - number of local rows (Cannot be PETSC_DECIDE)
5628 .  n - This value should be the same as the local size used in creating the
5629        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5630        calculated if N is given) For square matrices n is almost always m.
5631 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5632 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5633 .   i - row indices for "diagonal" portion of matrix
5634 .   j - column indices
5635 .   a - matrix values
5636 .   oi - row indices for "off-diagonal" portion of matrix
5637 .   oj - column indices
5638 -   oa - matrix values
5639 
5640    Output Parameter:
5641 .   mat - the matrix
5642 
5643    Level: advanced
5644 
5645    Notes:
5646        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5647        must free the arrays once the matrix has been destroyed and not before.
5648 
5649        The i and j indices are 0 based
5650 
5651        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5652 
5653        This sets local rows and cannot be used to set off-processor values.
5654 
5655        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5656        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5657        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5658        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5659        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5660        communication if it is known that only local entries will be set.
5661 
5662 .keywords: matrix, aij, compressed row, sparse, parallel
5663 
5664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5665           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5666 @*/
5667 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5668 {
5669   PetscErrorCode ierr;
5670   Mat_MPIAIJ     *maij;
5671 
5672   PetscFunctionBegin;
5673   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5674   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5675   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5676   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5677   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5678   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5679   maij = (Mat_MPIAIJ*) (*mat)->data;
5680 
5681   (*mat)->preallocated = PETSC_TRUE;
5682 
5683   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5684   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5685 
5686   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5687   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5688 
5689   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693 
5694   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5697   PetscFunctionReturn(0);
5698 }
5699 
5700 /*
5701     Special version for direct calls from Fortran
5702 */
5703 #include <petsc-private/fortranimpl.h>
5704 
5705 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5706 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5707 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5708 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5709 #endif
5710 
5711 /* Change these macros so can be used in void function */
5712 #undef CHKERRQ
5713 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5714 #undef SETERRQ2
5715 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5716 #undef SETERRQ3
5717 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5718 #undef SETERRQ
5719 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5720 
5721 #undef __FUNCT__
5722 #define __FUNCT__ "matsetvaluesmpiaij_"
5723 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5724 {
5725   Mat            mat  = *mmat;
5726   PetscInt       m    = *mm, n = *mn;
5727   InsertMode     addv = *maddv;
5728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5729   PetscScalar    value;
5730   PetscErrorCode ierr;
5731 
5732   MatCheckPreallocated(mat,1);
5733   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5734 
5735 #if defined(PETSC_USE_DEBUG)
5736   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5737 #endif
5738   {
5739     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5740     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5741     PetscBool roworiented = aij->roworiented;
5742 
5743     /* Some Variables required in the macro */
5744     Mat        A                 = aij->A;
5745     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5746     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5747     MatScalar  *aa               = a->a;
5748     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5749     Mat        B                 = aij->B;
5750     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5751     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5752     MatScalar  *ba               = b->a;
5753 
5754     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5755     PetscInt  nonew = a->nonew;
5756     MatScalar *ap1,*ap2;
5757 
5758     PetscFunctionBegin;
5759     for (i=0; i<m; i++) {
5760       if (im[i] < 0) continue;
5761 #if defined(PETSC_USE_DEBUG)
5762       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5763 #endif
5764       if (im[i] >= rstart && im[i] < rend) {
5765         row      = im[i] - rstart;
5766         lastcol1 = -1;
5767         rp1      = aj + ai[row];
5768         ap1      = aa + ai[row];
5769         rmax1    = aimax[row];
5770         nrow1    = ailen[row];
5771         low1     = 0;
5772         high1    = nrow1;
5773         lastcol2 = -1;
5774         rp2      = bj + bi[row];
5775         ap2      = ba + bi[row];
5776         rmax2    = bimax[row];
5777         nrow2    = bilen[row];
5778         low2     = 0;
5779         high2    = nrow2;
5780 
5781         for (j=0; j<n; j++) {
5782           if (roworiented) value = v[i*n+j];
5783           else value = v[i+j*m];
5784           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5785           if (in[j] >= cstart && in[j] < cend) {
5786             col = in[j] - cstart;
5787             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5788           } else if (in[j] < 0) continue;
5789 #if defined(PETSC_USE_DEBUG)
5790           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5791 #endif
5792           else {
5793             if (mat->was_assembled) {
5794               if (!aij->colmap) {
5795                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5796               }
5797 #if defined(PETSC_USE_CTABLE)
5798               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5799               col--;
5800 #else
5801               col = aij->colmap[in[j]] - 1;
5802 #endif
5803               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5804                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5805                 col  =  in[j];
5806                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5807                 B     = aij->B;
5808                 b     = (Mat_SeqAIJ*)B->data;
5809                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5810                 rp2   = bj + bi[row];
5811                 ap2   = ba + bi[row];
5812                 rmax2 = bimax[row];
5813                 nrow2 = bilen[row];
5814                 low2  = 0;
5815                 high2 = nrow2;
5816                 bm    = aij->B->rmap->n;
5817                 ba    = b->a;
5818               }
5819             } else col = in[j];
5820             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5821           }
5822         }
5823       } else if (!aij->donotstash) {
5824         if (roworiented) {
5825           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5826         } else {
5827           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5828         }
5829       }
5830     }
5831   }
5832   PetscFunctionReturnVoid();
5833 }
5834 
5835