xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7acc1648e4c2b2eb757ec59caa1d5e2be763971e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscMPIInt    size   = mat->size;
748   PetscSF        sf;
749   PetscInt      *lrows;
750   PetscSFNode   *rrows;
751   PetscInt       lastidx = -1, r, p = 0, len = 0;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   /* Create SF where leaves are input rows and roots are owned rows */
756   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
757   for (r = 0; r < n; ++r) lrows[r] = -1;
758   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
759   for (r = 0; r < N; ++r) {
760     const PetscInt idx   = rows[r];
761     PetscBool      found = PETSC_FALSE;
762     /* Trick for efficient searching for sorted rows */
763     if (lastidx > idx) p = 0;
764     lastidx = idx;
765     for (; p < size; ++p) {
766       if (idx >= owners[p] && idx < owners[p+1]) {
767         rrows[r].rank  = p;
768         rrows[r].index = rows[r] - owners[p];
769         found = PETSC_TRUE;
770         break;
771       }
772     }
773     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
774   }
775   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
776   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
777   /* Collect flags for rows to be zeroed */
778   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
779   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
780   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
781   /* Compress and put in row numbers */
782   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
783   /* fix right hand side if needed */
784   if (x && b) {
785     const PetscScalar *xx;
786     PetscScalar       *bb;
787 
788     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
789     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
790     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
791     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
793   }
794   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
795   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
796   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936 
937   /* only change matrix nonzero state if pattern was allowed to be changed */
938   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
939     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
940     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPIAIJ"
947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
950   PetscErrorCode ierr;
951   PetscInt       nt;
952 
953   PetscFunctionBegin;
954   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
955   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
956   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
957   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
958   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMultAdd_MPIAIJ"
977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
985   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   PetscErrorCode ierr;
996   PetscBool      merged;
997 
998   PetscFunctionBegin;
999   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1000   /* do nondiagonal part */
1001   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1002   if (!merged) {
1003     /* send it on its way */
1004     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* receive remote parts: note this assumes the values are not actually */
1008     /* added in yy until the next line, */
1009     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   } else {
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* send it on its way */
1014     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1015     /* values actually were received in the Begin() but we need to call this nop */
1016     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1023 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1024 {
1025   MPI_Comm       comm;
1026   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1027   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1028   IS             Me,Notme;
1029   PetscErrorCode ierr;
1030   PetscInt       M,N,first,last,*notme,i;
1031   PetscMPIInt    size;
1032 
1033   PetscFunctionBegin;
1034   /* Easy test: symmetric diagonal block */
1035   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1036   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1037   if (!*f) PetscFunctionReturn(0);
1038   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1040   if (size == 1) PetscFunctionReturn(0);
1041 
1042   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1043   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1044   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1045   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1046   for (i=0; i<first; i++) notme[i] = i;
1047   for (i=last; i<M; i++) notme[i-last+first] = i;
1048   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1049   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1050   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1051   Aoff = Aoffs[0];
1052   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1053   Boff = Boffs[0];
1054   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1055   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1056   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1057   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1058   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1059   ierr = PetscFree(notme);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1066 {
1067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   /* do nondiagonal part */
1072   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1073   /* send it on its way */
1074   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   /* do local part */
1076   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1077   /* receive remote parts */
1078   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1089 {
1090   PetscErrorCode ierr;
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092 
1093   PetscFunctionBegin;
1094   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1095   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1096   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatScale_MPIAIJ"
1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1109   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatDestroy_MatRedundant"
1115 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1116 {
1117   PetscErrorCode ierr;
1118   Mat_Redundant  *redund;
1119   PetscInt       i;
1120   PetscMPIInt    size;
1121 
1122   PetscFunctionBegin;
1123   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1124   if (size == 1) {
1125     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1126     redund = a->redundant;
1127   } else {
1128     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1129     redund = a->redundant;
1130   }
1131   if (redund){
1132     if (redund->matseq) { /* via MatGetSubMatrices()  */
1133       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1134       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1135       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1136       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1137     } else {
1138       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1139       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1140       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1141       for (i=0; i<redund->nrecvs; i++) {
1142         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1143         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1144       }
1145       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1146     }
1147 
1148     if (redund->psubcomm) {
1149       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1150     }
1151     ierr = PetscFree(redund);CHKERRQ(ierr);
1152   }
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatDestroy_MPIAIJ"
1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1159 {
1160   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164 #if defined(PETSC_USE_LOG)
1165   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1166 #endif
1167   ierr = MatDestroy_MatRedundant(mat);CHKERRQ(ierr);
1168   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1169   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1170   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1171   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1172 #if defined(PETSC_USE_CTABLE)
1173   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1174 #else
1175   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1176 #endif
1177   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1178   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1179   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1180   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1181   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1182   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1183 
1184   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1198 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1199 {
1200   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1201   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1202   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1203   PetscErrorCode ierr;
1204   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1205   int            fd;
1206   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1207   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1208   PetscScalar    *column_values;
1209   PetscInt       message_count,flowcontrolcount;
1210   FILE           *file;
1211 
1212   PetscFunctionBegin;
1213   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1214   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1215   nz   = A->nz + B->nz;
1216   if (!rank) {
1217     header[0] = MAT_FILE_CLASSID;
1218     header[1] = mat->rmap->N;
1219     header[2] = mat->cmap->N;
1220 
1221     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1222     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1223     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1224     /* get largest number of rows any processor has */
1225     rlen  = mat->rmap->n;
1226     range = mat->rmap->range;
1227     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1228   } else {
1229     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     rlen = mat->rmap->n;
1231   }
1232 
1233   /* load up the local row counts */
1234   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1235   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1236 
1237   /* store the row lengths to the file */
1238   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1239   if (!rank) {
1240     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1241     for (i=1; i<size; i++) {
1242       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1243       rlen = range[i+1] - range[i];
1244       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1246     }
1247     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1248   } else {
1249     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1250     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1254 
1255   /* load up the local column indices */
1256   nzmax = nz; /* th processor needs space a largest processor needs */
1257   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1258   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1259   cnt   = 0;
1260   for (i=0; i<mat->rmap->n; i++) {
1261     for (j=B->i[i]; j<B->i[i+1]; j++) {
1262       if ((col = garray[B->j[j]]) > cstart) break;
1263       column_indices[cnt++] = col;
1264     }
1265     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1266     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1267   }
1268   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1269 
1270   /* store the column indices to the file */
1271   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1272   if (!rank) {
1273     MPI_Status status;
1274     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1275     for (i=1; i<size; i++) {
1276       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1277       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1278       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1279       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     }
1282     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1283   } else {
1284     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1285     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1286     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1288   }
1289   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1290 
1291   /* load up the local column values */
1292   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1293   cnt  = 0;
1294   for (i=0; i<mat->rmap->n; i++) {
1295     for (j=B->i[i]; j<B->i[i+1]; j++) {
1296       if (garray[B->j[j]] > cstart) break;
1297       column_values[cnt++] = B->a[j];
1298     }
1299     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1300     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1301   }
1302   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1303 
1304   /* store the column values to the file */
1305   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1306   if (!rank) {
1307     MPI_Status status;
1308     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1309     for (i=1; i<size; i++) {
1310       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1311       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1312       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1313       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1314       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1315     }
1316     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1317   } else {
1318     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1319     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1322   }
1323   ierr = PetscFree(column_values);CHKERRQ(ierr);
1324 
1325   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1326   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 #include <petscdraw.h>
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1333 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1334 {
1335   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1336   PetscErrorCode    ierr;
1337   PetscMPIInt       rank = aij->rank,size = aij->size;
1338   PetscBool         isdraw,iascii,isbinary;
1339   PetscViewer       sviewer;
1340   PetscViewerFormat format;
1341 
1342   PetscFunctionBegin;
1343   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1344   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1345   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1346   if (iascii) {
1347     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1348     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1349       MatInfo   info;
1350       PetscBool inodes;
1351 
1352       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1353       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1354       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1356       if (!inodes) {
1357         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1358                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1359       } else {
1360         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1361                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1362       }
1363       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1365       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1366       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1367       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1368       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1370       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1371       PetscFunctionReturn(0);
1372     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1373       PetscInt inodecount,inodelimit,*inodes;
1374       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1375       if (inodes) {
1376         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1377       } else {
1378         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1379       }
1380       PetscFunctionReturn(0);
1381     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1382       PetscFunctionReturn(0);
1383     }
1384   } else if (isbinary) {
1385     if (size == 1) {
1386       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1387       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1388     } else {
1389       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1390     }
1391     PetscFunctionReturn(0);
1392   } else if (isdraw) {
1393     PetscDraw draw;
1394     PetscBool isnull;
1395     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1396     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1397   }
1398 
1399   {
1400     /* assemble the entire matrix onto first processor. */
1401     Mat        A;
1402     Mat_SeqAIJ *Aloc;
1403     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1404     MatScalar  *a;
1405 
1406     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1407     if (!rank) {
1408       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1409     } else {
1410       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1411     }
1412     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1413     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1414     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1415     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1416     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1417 
1418     /* copy over the A part */
1419     Aloc = (Mat_SeqAIJ*)aij->A->data;
1420     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1421     row  = mat->rmap->rstart;
1422     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1423     for (i=0; i<m; i++) {
1424       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1425       row++;
1426       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1427     }
1428     aj = Aloc->j;
1429     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1430 
1431     /* copy over the B part */
1432     Aloc = (Mat_SeqAIJ*)aij->B->data;
1433     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1434     row  = mat->rmap->rstart;
1435     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1436     ct   = cols;
1437     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1438     for (i=0; i<m; i++) {
1439       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1440       row++;
1441       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1442     }
1443     ierr = PetscFree(ct);CHKERRQ(ierr);
1444     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1445     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446     /*
1447        Everyone has to call to draw the matrix since the graphics waits are
1448        synchronized across all processors that share the PetscDraw object
1449     */
1450     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1451     if (!rank) {
1452       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1453     }
1454     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1455     ierr = MatDestroy(&A);CHKERRQ(ierr);
1456   }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatView_MPIAIJ"
1462 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1463 {
1464   PetscErrorCode ierr;
1465   PetscBool      iascii,isdraw,issocket,isbinary;
1466 
1467   PetscFunctionBegin;
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1472   if (iascii || isdraw || isbinary || issocket) {
1473     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatSOR_MPIAIJ"
1480 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1481 {
1482   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1483   PetscErrorCode ierr;
1484   Vec            bb1 = 0;
1485   PetscBool      hasop;
1486 
1487   PetscFunctionBegin;
1488   if (flag == SOR_APPLY_UPPER) {
1489     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1490     PetscFunctionReturn(0);
1491   }
1492 
1493   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1494     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1495   }
1496 
1497   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1498     if (flag & SOR_ZERO_INITIAL_GUESS) {
1499       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1500       its--;
1501     }
1502 
1503     while (its--) {
1504       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506 
1507       /* update rhs: bb1 = bb - B*x */
1508       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1509       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1510 
1511       /* local sweep */
1512       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1513     }
1514   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1515     if (flag & SOR_ZERO_INITIAL_GUESS) {
1516       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1517       its--;
1518     }
1519     while (its--) {
1520       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522 
1523       /* update rhs: bb1 = bb - B*x */
1524       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1525       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1526 
1527       /* local sweep */
1528       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1529     }
1530   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1531     if (flag & SOR_ZERO_INITIAL_GUESS) {
1532       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1533       its--;
1534     }
1535     while (its--) {
1536       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538 
1539       /* update rhs: bb1 = bb - B*x */
1540       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1541       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1542 
1543       /* local sweep */
1544       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1545     }
1546   } else if (flag & SOR_EISENSTAT) {
1547     Vec xx1;
1548 
1549     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1550     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1551 
1552     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554     if (!mat->diag) {
1555       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1556       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1557     }
1558     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1559     if (hasop) {
1560       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1561     } else {
1562       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1563     }
1564     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1565 
1566     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1567 
1568     /* local sweep */
1569     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1570     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1571     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1572   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1573 
1574   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatPermute_MPIAIJ"
1580 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1581 {
1582   Mat            aA,aB,Aperm;
1583   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1584   PetscScalar    *aa,*ba;
1585   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1586   PetscSF        rowsf,sf;
1587   IS             parcolp = NULL;
1588   PetscBool      done;
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1593   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1594   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1595   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1596 
1597   /* Invert row permutation to find out where my rows should go */
1598   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1599   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1600   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1601   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1602   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1604 
1605   /* Invert column permutation to find out where my columns should go */
1606   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1607   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1608   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1609   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1610   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1611   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1613 
1614   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1615   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1616   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1617 
1618   /* Find out where my gcols should go */
1619   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1620   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1621   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1622   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1623   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1624   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1625   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1626   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1627 
1628   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1629   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1630   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1631   for (i=0; i<m; i++) {
1632     PetscInt row = rdest[i],rowner;
1633     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1634     for (j=ai[i]; j<ai[i+1]; j++) {
1635       PetscInt cowner,col = cdest[aj[j]];
1636       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1637       if (rowner == cowner) dnnz[i]++;
1638       else onnz[i]++;
1639     }
1640     for (j=bi[i]; j<bi[i+1]; j++) {
1641       PetscInt cowner,col = gcdest[bj[j]];
1642       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1643       if (rowner == cowner) dnnz[i]++;
1644       else onnz[i]++;
1645     }
1646   }
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1649   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1650   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1651   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1652 
1653   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1654   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1655   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1656   for (i=0; i<m; i++) {
1657     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1658     PetscInt j0,rowlen;
1659     rowlen = ai[i+1] - ai[i];
1660     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1661       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1662       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1663     }
1664     rowlen = bi[i+1] - bi[i];
1665     for (j0=j=0; j<rowlen; j0=j) {
1666       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1667       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1668     }
1669   }
1670   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1673   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1674   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1675   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1676   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1677   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1678   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1679   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1680   *B = Aperm;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1686 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1687 {
1688   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1689   Mat            A    = mat->A,B = mat->B;
1690   PetscErrorCode ierr;
1691   PetscReal      isend[5],irecv[5];
1692 
1693   PetscFunctionBegin;
1694   info->block_size = 1.0;
1695   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1696 
1697   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1698   isend[3] = info->memory;  isend[4] = info->mallocs;
1699 
1700   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1701 
1702   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1703   isend[3] += info->memory;  isend[4] += info->mallocs;
1704   if (flag == MAT_LOCAL) {
1705     info->nz_used      = isend[0];
1706     info->nz_allocated = isend[1];
1707     info->nz_unneeded  = isend[2];
1708     info->memory       = isend[3];
1709     info->mallocs      = isend[4];
1710   } else if (flag == MAT_GLOBAL_MAX) {
1711     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1712 
1713     info->nz_used      = irecv[0];
1714     info->nz_allocated = irecv[1];
1715     info->nz_unneeded  = irecv[2];
1716     info->memory       = irecv[3];
1717     info->mallocs      = irecv[4];
1718   } else if (flag == MAT_GLOBAL_SUM) {
1719     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1720 
1721     info->nz_used      = irecv[0];
1722     info->nz_allocated = irecv[1];
1723     info->nz_unneeded  = irecv[2];
1724     info->memory       = irecv[3];
1725     info->mallocs      = irecv[4];
1726   }
1727   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1728   info->fill_ratio_needed = 0;
1729   info->factor_mallocs    = 0;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatSetOption_MPIAIJ"
1735 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1736 {
1737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   switch (op) {
1742   case MAT_NEW_NONZERO_LOCATIONS:
1743   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1744   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1745   case MAT_KEEP_NONZERO_PATTERN:
1746   case MAT_NEW_NONZERO_LOCATION_ERR:
1747   case MAT_USE_INODES:
1748   case MAT_IGNORE_ZERO_ENTRIES:
1749     MatCheckPreallocated(A,1);
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1752     break;
1753   case MAT_ROW_ORIENTED:
1754     a->roworiented = flg;
1755 
1756     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1757     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_NEW_DIAGONALS:
1760     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1761     break;
1762   case MAT_IGNORE_OFF_PROC_ENTRIES:
1763     a->donotstash = flg;
1764     break;
1765   case MAT_SPD:
1766     A->spd_set = PETSC_TRUE;
1767     A->spd     = flg;
1768     if (flg) {
1769       A->symmetric                  = PETSC_TRUE;
1770       A->structurally_symmetric     = PETSC_TRUE;
1771       A->symmetric_set              = PETSC_TRUE;
1772       A->structurally_symmetric_set = PETSC_TRUE;
1773     }
1774     break;
1775   case MAT_SYMMETRIC:
1776     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1777     break;
1778   case MAT_STRUCTURALLY_SYMMETRIC:
1779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1780     break;
1781   case MAT_HERMITIAN:
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   case MAT_SYMMETRY_ETERNAL:
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     break;
1787   default:
1788     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1789   }
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatGetRow_MPIAIJ"
1795 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1796 {
1797   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1798   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1799   PetscErrorCode ierr;
1800   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1801   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1802   PetscInt       *cmap,*idx_p;
1803 
1804   PetscFunctionBegin;
1805   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1806   mat->getrowactive = PETSC_TRUE;
1807 
1808   if (!mat->rowvalues && (idx || v)) {
1809     /*
1810         allocate enough space to hold information from the longest row.
1811     */
1812     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1813     PetscInt   max = 1,tmp;
1814     for (i=0; i<matin->rmap->n; i++) {
1815       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1816       if (max < tmp) max = tmp;
1817     }
1818     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1819   }
1820 
1821   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1822   lrow = row - rstart;
1823 
1824   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1825   if (!v)   {pvA = 0; pvB = 0;}
1826   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1827   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1828   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1829   nztot = nzA + nzB;
1830 
1831   cmap = mat->garray;
1832   if (v  || idx) {
1833     if (nztot) {
1834       /* Sort by increasing column numbers, assuming A and B already sorted */
1835       PetscInt imark = -1;
1836       if (v) {
1837         *v = v_p = mat->rowvalues;
1838         for (i=0; i<nzB; i++) {
1839           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1840           else break;
1841         }
1842         imark = i;
1843         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1844         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1845       }
1846       if (idx) {
1847         *idx = idx_p = mat->rowindices;
1848         if (imark > -1) {
1849           for (i=0; i<imark; i++) {
1850             idx_p[i] = cmap[cworkB[i]];
1851           }
1852         } else {
1853           for (i=0; i<nzB; i++) {
1854             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1855             else break;
1856           }
1857           imark = i;
1858         }
1859         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1860         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1861       }
1862     } else {
1863       if (idx) *idx = 0;
1864       if (v)   *v   = 0;
1865     }
1866   }
1867   *nz  = nztot;
1868   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1869   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1875 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1876 {
1877   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1878 
1879   PetscFunctionBegin;
1880   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1881   aij->getrowactive = PETSC_FALSE;
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatNorm_MPIAIJ"
1887 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1888 {
1889   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1890   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1891   PetscErrorCode ierr;
1892   PetscInt       i,j,cstart = mat->cmap->rstart;
1893   PetscReal      sum = 0.0;
1894   MatScalar      *v;
1895 
1896   PetscFunctionBegin;
1897   if (aij->size == 1) {
1898     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1899   } else {
1900     if (type == NORM_FROBENIUS) {
1901       v = amat->a;
1902       for (i=0; i<amat->nz; i++) {
1903         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1904       }
1905       v = bmat->a;
1906       for (i=0; i<bmat->nz; i++) {
1907         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1908       }
1909       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1910       *norm = PetscSqrtReal(*norm);
1911     } else if (type == NORM_1) { /* max column norm */
1912       PetscReal *tmp,*tmp2;
1913       PetscInt  *jj,*garray = aij->garray;
1914       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1915       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1916       *norm = 0.0;
1917       v     = amat->a; jj = amat->j;
1918       for (j=0; j<amat->nz; j++) {
1919         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1920       }
1921       v = bmat->a; jj = bmat->j;
1922       for (j=0; j<bmat->nz; j++) {
1923         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1924       }
1925       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1926       for (j=0; j<mat->cmap->N; j++) {
1927         if (tmp2[j] > *norm) *norm = tmp2[j];
1928       }
1929       ierr = PetscFree(tmp);CHKERRQ(ierr);
1930       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1931     } else if (type == NORM_INFINITY) { /* max row norm */
1932       PetscReal ntemp = 0.0;
1933       for (j=0; j<aij->A->rmap->n; j++) {
1934         v   = amat->a + amat->i[j];
1935         sum = 0.0;
1936         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1937           sum += PetscAbsScalar(*v); v++;
1938         }
1939         v = bmat->a + bmat->i[j];
1940         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1941           sum += PetscAbsScalar(*v); v++;
1942         }
1943         if (sum > ntemp) ntemp = sum;
1944       }
1945       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1946     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatTranspose_MPIAIJ"
1953 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1954 {
1955   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1956   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1957   PetscErrorCode ierr;
1958   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1959   PetscInt       cstart = A->cmap->rstart,ncol;
1960   Mat            B;
1961   MatScalar      *array;
1962 
1963   PetscFunctionBegin;
1964   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1965 
1966   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1967   ai = Aloc->i; aj = Aloc->j;
1968   bi = Bloc->i; bj = Bloc->j;
1969   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1970     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1971     PetscSFNode          *oloc;
1972     PETSC_UNUSED PetscSF sf;
1973 
1974     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1975     /* compute d_nnz for preallocation */
1976     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1977     for (i=0; i<ai[ma]; i++) {
1978       d_nnz[aj[i]]++;
1979       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1980     }
1981     /* compute local off-diagonal contributions */
1982     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1983     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1984     /* map those to global */
1985     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1986     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1987     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1988     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1989     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1990     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1991     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1992 
1993     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1994     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1995     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1996     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1997     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1998     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1999   } else {
2000     B    = *matout;
2001     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2002     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2003   }
2004 
2005   /* copy over the A part */
2006   array = Aloc->a;
2007   row   = A->rmap->rstart;
2008   for (i=0; i<ma; i++) {
2009     ncol = ai[i+1]-ai[i];
2010     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2011     row++;
2012     array += ncol; aj += ncol;
2013   }
2014   aj = Aloc->j;
2015   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2016 
2017   /* copy over the B part */
2018   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2019   array = Bloc->a;
2020   row   = A->rmap->rstart;
2021   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2022   cols_tmp = cols;
2023   for (i=0; i<mb; i++) {
2024     ncol = bi[i+1]-bi[i];
2025     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2026     row++;
2027     array += ncol; cols_tmp += ncol;
2028   }
2029   ierr = PetscFree(cols);CHKERRQ(ierr);
2030 
2031   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2034     *matout = B;
2035   } else {
2036     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2037   }
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2043 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2044 {
2045   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2046   Mat            a    = aij->A,b = aij->B;
2047   PetscErrorCode ierr;
2048   PetscInt       s1,s2,s3;
2049 
2050   PetscFunctionBegin;
2051   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2052   if (rr) {
2053     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2054     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2055     /* Overlap communication with computation. */
2056     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2057   }
2058   if (ll) {
2059     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2060     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2061     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2062   }
2063   /* scale  the diagonal block */
2064   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2065 
2066   if (rr) {
2067     /* Do a scatter end and then right scale the off-diagonal block */
2068     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2069     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2076 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2077 {
2078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2079   PetscErrorCode ierr;
2080 
2081   PetscFunctionBegin;
2082   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatEqual_MPIAIJ"
2088 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2089 {
2090   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2091   Mat            a,b,c,d;
2092   PetscBool      flg;
2093   PetscErrorCode ierr;
2094 
2095   PetscFunctionBegin;
2096   a = matA->A; b = matA->B;
2097   c = matB->A; d = matB->B;
2098 
2099   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2100   if (flg) {
2101     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2102   }
2103   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatCopy_MPIAIJ"
2109 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2110 {
2111   PetscErrorCode ierr;
2112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2113   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2114 
2115   PetscFunctionBegin;
2116   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2117   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2118     /* because of the column compression in the off-processor part of the matrix a->B,
2119        the number of columns in a->B and b->B may be different, hence we cannot call
2120        the MatCopy() directly on the two parts. If need be, we can provide a more
2121        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2122        then copying the submatrices */
2123     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2124   } else {
2125     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2126     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatSetUp_MPIAIJ"
2133 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2134 {
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2144 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2145 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2146 {
2147   PetscInt       i,m=Y->rmap->N;
2148   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2149   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2150   const PetscInt *xi = x->i,*yi = y->i;
2151 
2152   PetscFunctionBegin;
2153   /* Set the number of nonzeros in the new matrix */
2154   for (i=0; i<m; i++) {
2155     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2156     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2157     nnz[i] = 0;
2158     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2159       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2160       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2161       nnz[i]++;
2162     }
2163     for (; k<nzy; k++) nnz[i]++;
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatAXPY_MPIAIJ"
2170 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2171 {
2172   PetscErrorCode ierr;
2173   PetscInt       i;
2174   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2175   PetscBLASInt   bnz,one=1;
2176   Mat_SeqAIJ     *x,*y;
2177 
2178   PetscFunctionBegin;
2179   if (str == SAME_NONZERO_PATTERN) {
2180     PetscScalar alpha = a;
2181     x    = (Mat_SeqAIJ*)xx->A->data;
2182     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2183     y    = (Mat_SeqAIJ*)yy->A->data;
2184     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2185     x    = (Mat_SeqAIJ*)xx->B->data;
2186     y    = (Mat_SeqAIJ*)yy->B->data;
2187     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2188     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2189     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2190   } else if (str == SUBSET_NONZERO_PATTERN) {
2191     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2192 
2193     x = (Mat_SeqAIJ*)xx->B->data;
2194     y = (Mat_SeqAIJ*)yy->B->data;
2195     if (y->xtoy && y->XtoY != xx->B) {
2196       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2197       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2198     }
2199     if (!y->xtoy) { /* get xtoy */
2200       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2201       y->XtoY = xx->B;
2202       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2203     }
2204     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2205     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2206   } else {
2207     Mat      B;
2208     PetscInt *nnz_d,*nnz_o;
2209     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2210     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2211     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2212     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2213     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2214     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2215     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2216     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2217     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2218     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2219     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2220     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2221     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2222     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "MatConjugate_MPIAIJ"
2231 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2232 {
2233 #if defined(PETSC_USE_COMPLEX)
2234   PetscErrorCode ierr;
2235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2236 
2237   PetscFunctionBegin;
2238   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2239   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2240 #else
2241   PetscFunctionBegin;
2242 #endif
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatRealPart_MPIAIJ"
2248 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2249 {
2250   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2251   PetscErrorCode ierr;
2252 
2253   PetscFunctionBegin;
2254   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2255   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2261 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2262 {
2263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2268   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #if defined(PETSC_HAVE_PBGL)
2273 
2274 #include <boost/parallel/mpi/bsp_process_group.hpp>
2275 #include <boost/graph/distributed/ilu_default_graph.hpp>
2276 #include <boost/graph/distributed/ilu_0_block.hpp>
2277 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2278 #include <boost/graph/distributed/petsc/interface.hpp>
2279 #include <boost/multi_array.hpp>
2280 #include <boost/parallel/distributed_property_map->hpp>
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2284 /*
2285   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2286 */
2287 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2288 {
2289   namespace petsc = boost::distributed::petsc;
2290 
2291   namespace graph_dist = boost::graph::distributed;
2292   using boost::graph::distributed::ilu_default::process_group_type;
2293   using boost::graph::ilu_permuted;
2294 
2295   PetscBool      row_identity, col_identity;
2296   PetscContainer c;
2297   PetscInt       m, n, M, N;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2302   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2303   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2304   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2305 
2306   process_group_type pg;
2307   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2308   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2309   lgraph_type& level_graph = *lgraph_p;
2310   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2311 
2312   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2313   ilu_permuted(level_graph);
2314 
2315   /* put together the new matrix */
2316   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2317   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2318   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2319   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2320   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2321   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2322   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2323   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2324 
2325   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2326   ierr = PetscContainerSetPointer(c, lgraph_p);
2327   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2328   ierr = PetscContainerDestroy(&c);
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2334 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2335 {
2336   PetscFunctionBegin;
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatSolve_MPIAIJ"
2342 /*
2343   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2344 */
2345 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2346 {
2347   namespace graph_dist = boost::graph::distributed;
2348 
2349   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2350   lgraph_type    *lgraph_p;
2351   PetscContainer c;
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2356   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2357   ierr = VecCopy(b, x);CHKERRQ(ierr);
2358 
2359   PetscScalar *array_x;
2360   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2361   PetscInt sx;
2362   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2363 
2364   PetscScalar *array_b;
2365   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2366   PetscInt sb;
2367   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2368 
2369   lgraph_type& level_graph = *lgraph_p;
2370   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2371 
2372   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2373   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2374   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2375 
2376   typedef boost::iterator_property_map<array_ref_type::iterator,
2377                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2378   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2379   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2380 
2381   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2382   PetscFunctionReturn(0);
2383 }
2384 #endif
2385 
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2389 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2390 {
2391   PetscMPIInt    rank,size;
2392   MPI_Comm       comm;
2393   PetscErrorCode ierr;
2394   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2395   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2396   PetscInt       *rowrange = mat->rmap->range;
2397   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2398   Mat            A = aij->A,B=aij->B,C=*matredundant;
2399   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2400   PetscScalar    *sbuf_a;
2401   PetscInt       nzlocal=a->nz+b->nz;
2402   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2403   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2404   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2405   MatScalar      *aworkA,*aworkB;
2406   PetscScalar    *vals;
2407   PetscMPIInt    tag1,tag2,tag3,imdex;
2408   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2409   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2410   MPI_Status     recv_status,*send_status;
2411   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2412   PetscInt       **rbuf_j=NULL;
2413   PetscScalar    **rbuf_a=NULL;
2414   Mat_Redundant  *redund =NULL;
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2418   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2420   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2421   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2422 
2423   if (reuse == MAT_REUSE_MATRIX) {
2424     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2425     if (subsize == 1) {
2426       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2427       redund = c->redundant;
2428     } else {
2429       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2430       redund = c->redundant;
2431     }
2432     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2433 
2434     nsends    = redund->nsends;
2435     nrecvs    = redund->nrecvs;
2436     send_rank = redund->send_rank;
2437     recv_rank = redund->recv_rank;
2438     sbuf_nz   = redund->sbuf_nz;
2439     rbuf_nz   = redund->rbuf_nz;
2440     sbuf_j    = redund->sbuf_j;
2441     sbuf_a    = redund->sbuf_a;
2442     rbuf_j    = redund->rbuf_j;
2443     rbuf_a    = redund->rbuf_a;
2444   }
2445 
2446   if (reuse == MAT_INITIAL_MATRIX) {
2447     PetscInt    nleftover,np_subcomm;
2448 
2449     /* get the destination processors' id send_rank, nsends and nrecvs */
2450     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2451 
2452     np_subcomm = size/nsubcomm;
2453     nleftover  = size - nsubcomm*np_subcomm;
2454 
2455     /* block of codes below is specific for INTERLACED */
2456     /* ------------------------------------------------*/
2457     nsends = 0; nrecvs = 0;
2458     for (i=0; i<size; i++) {
2459       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2460         send_rank[nsends++] = i;
2461         recv_rank[nrecvs++] = i;
2462       }
2463     }
2464     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2465       i = size-nleftover-1;
2466       j = 0;
2467       while (j < nsubcomm - nleftover) {
2468         send_rank[nsends++] = i;
2469         i--; j++;
2470       }
2471     }
2472 
2473     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2474       for (i=0; i<nleftover; i++) {
2475         recv_rank[nrecvs++] = size-nleftover+i;
2476       }
2477     }
2478     /*----------------------------------------------*/
2479 
2480     /* allocate sbuf_j, sbuf_a */
2481     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2482     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2483     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2484     /*
2485     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2486     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2487      */
2488   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2489 
2490   /* copy mat's local entries into the buffers */
2491   if (reuse == MAT_INITIAL_MATRIX) {
2492     rownz_max = 0;
2493     rptr      = sbuf_j;
2494     cols      = sbuf_j + rend-rstart + 1;
2495     vals      = sbuf_a;
2496     rptr[0]   = 0;
2497     for (i=0; i<rend-rstart; i++) {
2498       row    = i + rstart;
2499       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2500       ncols  = nzA + nzB;
2501       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2502       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2503       /* load the column indices for this row into cols */
2504       lwrite = 0;
2505       for (l=0; l<nzB; l++) {
2506         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2507           vals[lwrite]   = aworkB[l];
2508           cols[lwrite++] = ctmp;
2509         }
2510       }
2511       for (l=0; l<nzA; l++) {
2512         vals[lwrite]   = aworkA[l];
2513         cols[lwrite++] = cstart + cworkA[l];
2514       }
2515       for (l=0; l<nzB; l++) {
2516         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2517           vals[lwrite]   = aworkB[l];
2518           cols[lwrite++] = ctmp;
2519         }
2520       }
2521       vals     += ncols;
2522       cols     += ncols;
2523       rptr[i+1] = rptr[i] + ncols;
2524       if (rownz_max < ncols) rownz_max = ncols;
2525     }
2526     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2527   } else { /* only copy matrix values into sbuf_a */
2528     rptr    = sbuf_j;
2529     vals    = sbuf_a;
2530     rptr[0] = 0;
2531     for (i=0; i<rend-rstart; i++) {
2532       row    = i + rstart;
2533       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2534       ncols  = nzA + nzB;
2535       cworkB = b->j + b->i[i];
2536       aworkA = a->a + a->i[i];
2537       aworkB = b->a + b->i[i];
2538       lwrite = 0;
2539       for (l=0; l<nzB; l++) {
2540         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2541       }
2542       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2543       for (l=0; l<nzB; l++) {
2544         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2545       }
2546       vals     += ncols;
2547       rptr[i+1] = rptr[i] + ncols;
2548     }
2549   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2550 
2551   /* send nzlocal to others, and recv other's nzlocal */
2552   /*--------------------------------------------------*/
2553   if (reuse == MAT_INITIAL_MATRIX) {
2554     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2555 
2556     s_waits2 = s_waits3 + nsends;
2557     s_waits1 = s_waits2 + nsends;
2558     r_waits1 = s_waits1 + nsends;
2559     r_waits2 = r_waits1 + nrecvs;
2560     r_waits3 = r_waits2 + nrecvs;
2561   } else {
2562     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2563 
2564     r_waits3 = s_waits3 + nsends;
2565   }
2566 
2567   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2568   if (reuse == MAT_INITIAL_MATRIX) {
2569     /* get new tags to keep the communication clean */
2570     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2571     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2572     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2573 
2574     /* post receives of other's nzlocal */
2575     for (i=0; i<nrecvs; i++) {
2576       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2577     }
2578     /* send nzlocal to others */
2579     for (i=0; i<nsends; i++) {
2580       sbuf_nz[i] = nzlocal;
2581       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2582     }
2583     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2584     count = nrecvs;
2585     while (count) {
2586       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2587 
2588       recv_rank[imdex] = recv_status.MPI_SOURCE;
2589       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2590       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2591 
2592       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2593 
2594       rbuf_nz[imdex] += i + 2;
2595 
2596       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2597       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2598       count--;
2599     }
2600     /* wait on sends of nzlocal */
2601     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2602     /* send mat->i,j to others, and recv from other's */
2603     /*------------------------------------------------*/
2604     for (i=0; i<nsends; i++) {
2605       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2606       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2607     }
2608     /* wait on receives of mat->i,j */
2609     /*------------------------------*/
2610     count = nrecvs;
2611     while (count) {
2612       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2613       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2614       count--;
2615     }
2616     /* wait on sends of mat->i,j */
2617     /*---------------------------*/
2618     if (nsends) {
2619       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2620     }
2621   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2622 
2623   /* post receives, send and receive mat->a */
2624   /*----------------------------------------*/
2625   for (imdex=0; imdex<nrecvs; imdex++) {
2626     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2627   }
2628   for (i=0; i<nsends; i++) {
2629     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2630   }
2631   count = nrecvs;
2632   while (count) {
2633     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2634     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2635     count--;
2636   }
2637   if (nsends) {
2638     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2639   }
2640 
2641   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2642 
2643   /* create redundant matrix */
2644   /*-------------------------*/
2645   if (reuse == MAT_INITIAL_MATRIX) {
2646     const PetscInt *range;
2647     PetscInt       rstart_sub,rend_sub,mloc_sub;
2648 
2649     /* compute rownz_max for preallocation */
2650     for (imdex=0; imdex<nrecvs; imdex++) {
2651       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2652       rptr = rbuf_j[imdex];
2653       for (i=0; i<j; i++) {
2654         ncols = rptr[i+1] - rptr[i];
2655         if (rownz_max < ncols) rownz_max = ncols;
2656       }
2657     }
2658 
2659     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2660 
2661     /* get local size of redundant matrix
2662        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2663     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2664     rstart_sub = range[nsubcomm*subrank];
2665     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2666       rend_sub = range[nsubcomm*(subrank+1)];
2667     } else {
2668       rend_sub = mat->rmap->N;
2669     }
2670     mloc_sub = rend_sub - rstart_sub;
2671 
2672     if (M == N) {
2673       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2674     } else { /* non-square matrix */
2675       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2676     }
2677     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2678     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2679     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2680     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2681   } else {
2682     C = *matredundant;
2683   }
2684 
2685   /* insert local matrix entries */
2686   rptr = sbuf_j;
2687   cols = sbuf_j + rend-rstart + 1;
2688   vals = sbuf_a;
2689   for (i=0; i<rend-rstart; i++) {
2690     row   = i + rstart;
2691     ncols = rptr[i+1] - rptr[i];
2692     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2693     vals += ncols;
2694     cols += ncols;
2695   }
2696   /* insert received matrix entries */
2697   for (imdex=0; imdex<nrecvs; imdex++) {
2698     rstart = rowrange[recv_rank[imdex]];
2699     rend   = rowrange[recv_rank[imdex]+1];
2700     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2701     rptr   = rbuf_j[imdex];
2702     cols   = rbuf_j[imdex] + rend-rstart + 1;
2703     vals   = rbuf_a[imdex];
2704     for (i=0; i<rend-rstart; i++) {
2705       row   = i + rstart;
2706       ncols = rptr[i+1] - rptr[i];
2707       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2708       vals += ncols;
2709       cols += ncols;
2710     }
2711   }
2712   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2713   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2714 
2715   if (reuse == MAT_INITIAL_MATRIX) {
2716     *matredundant = C;
2717 
2718     /* create a supporting struct and attach it to C for reuse */
2719     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2720     if (subsize == 1) {
2721       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2722       c->redundant = redund;
2723     } else {
2724       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2725       c->redundant = redund;
2726     }
2727 
2728     redund->nzlocal   = nzlocal;
2729     redund->nsends    = nsends;
2730     redund->nrecvs    = nrecvs;
2731     redund->send_rank = send_rank;
2732     redund->recv_rank = recv_rank;
2733     redund->sbuf_nz   = sbuf_nz;
2734     redund->rbuf_nz   = rbuf_nz;
2735     redund->sbuf_j    = sbuf_j;
2736     redund->sbuf_a    = sbuf_a;
2737     redund->rbuf_j    = rbuf_j;
2738     redund->rbuf_a    = rbuf_a;
2739     redund->psubcomm  = NULL;
2740   }
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2746 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2747 {
2748   PetscErrorCode ierr;
2749   MPI_Comm       comm;
2750   PetscMPIInt    size,subsize;
2751   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2752   Mat_Redundant  *redund=NULL;
2753   PetscSubcomm   psubcomm=NULL;
2754   MPI_Comm       subcomm_in=subcomm;
2755   Mat            *matseq;
2756   IS             isrow,iscol;
2757 
2758   PetscFunctionBegin;
2759   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2760     if (reuse ==  MAT_INITIAL_MATRIX) {
2761       /* create psubcomm, then get subcomm */
2762       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2763       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2764       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2765 
2766       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2767       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2768       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2769       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2770       subcomm = psubcomm->comm;
2771     } else { /* retrieve psubcomm and subcomm */
2772       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2773       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2774       if (subsize == 1) {
2775         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2776         redund = c->redundant;
2777       } else {
2778         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2779         redund = c->redundant;
2780       }
2781       psubcomm = redund->psubcomm;
2782     }
2783     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2784       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2785       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2786         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2787         if (subsize == 1) {
2788           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2789           c->redundant->psubcomm = psubcomm;
2790         } else {
2791           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2792           c->redundant->psubcomm = psubcomm ;
2793         }
2794       }
2795       PetscFunctionReturn(0);
2796     }
2797   }
2798 
2799   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2800   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2801   if (reuse == MAT_INITIAL_MATRIX) {
2802     /* create a local sequential matrix matseq[0] */
2803     mloc_sub = PETSC_DECIDE;
2804     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2805     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2806     rstart = rend - mloc_sub;
2807     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2808     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2809   } else { /* reuse == MAT_REUSE_MATRIX */
2810     if (subsize == 1) {
2811       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2812       redund = c->redundant;
2813     } else {
2814       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2815       redund = c->redundant;
2816     }
2817 
2818     isrow  = redund->isrow;
2819     iscol  = redund->iscol;
2820     matseq = redund->matseq;
2821   }
2822   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2823   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2824 
2825   if (reuse == MAT_INITIAL_MATRIX) {
2826     /* create a supporting struct and attach it to C for reuse */
2827     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2828     if (subsize == 1) {
2829       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2830       c->redundant = redund;
2831     } else {
2832       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2833       c->redundant = redund;
2834     }
2835     redund->isrow    = isrow;
2836     redund->iscol    = iscol;
2837     redund->matseq   = matseq;
2838     redund->psubcomm = psubcomm;
2839   }
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2845 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2846 {
2847   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2848   PetscErrorCode ierr;
2849   PetscInt       i,*idxb = 0;
2850   PetscScalar    *va,*vb;
2851   Vec            vtmp;
2852 
2853   PetscFunctionBegin;
2854   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2855   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2856   if (idx) {
2857     for (i=0; i<A->rmap->n; i++) {
2858       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2859     }
2860   }
2861 
2862   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2863   if (idx) {
2864     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2865   }
2866   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2867   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2868 
2869   for (i=0; i<A->rmap->n; i++) {
2870     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2871       va[i] = vb[i];
2872       if (idx) idx[i] = a->garray[idxb[i]];
2873     }
2874   }
2875 
2876   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2877   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2878   ierr = PetscFree(idxb);CHKERRQ(ierr);
2879   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2885 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2886 {
2887   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2888   PetscErrorCode ierr;
2889   PetscInt       i,*idxb = 0;
2890   PetscScalar    *va,*vb;
2891   Vec            vtmp;
2892 
2893   PetscFunctionBegin;
2894   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2895   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2896   if (idx) {
2897     for (i=0; i<A->cmap->n; i++) {
2898       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2899     }
2900   }
2901 
2902   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2903   if (idx) {
2904     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2905   }
2906   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2907   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2908 
2909   for (i=0; i<A->rmap->n; i++) {
2910     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2911       va[i] = vb[i];
2912       if (idx) idx[i] = a->garray[idxb[i]];
2913     }
2914   }
2915 
2916   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2917   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2918   ierr = PetscFree(idxb);CHKERRQ(ierr);
2919   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2920   PetscFunctionReturn(0);
2921 }
2922 
2923 #undef __FUNCT__
2924 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2925 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2926 {
2927   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2928   PetscInt       n      = A->rmap->n;
2929   PetscInt       cstart = A->cmap->rstart;
2930   PetscInt       *cmap  = mat->garray;
2931   PetscInt       *diagIdx, *offdiagIdx;
2932   Vec            diagV, offdiagV;
2933   PetscScalar    *a, *diagA, *offdiagA;
2934   PetscInt       r;
2935   PetscErrorCode ierr;
2936 
2937   PetscFunctionBegin;
2938   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2939   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2940   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2941   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2942   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2943   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2944   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2945   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2946   for (r = 0; r < n; ++r) {
2947     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2948       a[r]   = diagA[r];
2949       idx[r] = cstart + diagIdx[r];
2950     } else {
2951       a[r]   = offdiagA[r];
2952       idx[r] = cmap[offdiagIdx[r]];
2953     }
2954   }
2955   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2956   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2957   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2958   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2959   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2960   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 #undef __FUNCT__
2965 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2966 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2967 {
2968   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2969   PetscInt       n      = A->rmap->n;
2970   PetscInt       cstart = A->cmap->rstart;
2971   PetscInt       *cmap  = mat->garray;
2972   PetscInt       *diagIdx, *offdiagIdx;
2973   Vec            diagV, offdiagV;
2974   PetscScalar    *a, *diagA, *offdiagA;
2975   PetscInt       r;
2976   PetscErrorCode ierr;
2977 
2978   PetscFunctionBegin;
2979   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2980   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2981   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2982   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2983   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2984   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2985   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2986   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2987   for (r = 0; r < n; ++r) {
2988     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2989       a[r]   = diagA[r];
2990       idx[r] = cstart + diagIdx[r];
2991     } else {
2992       a[r]   = offdiagA[r];
2993       idx[r] = cmap[offdiagIdx[r]];
2994     }
2995   }
2996   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2997   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2998   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2999   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3000   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3001   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3002   PetscFunctionReturn(0);
3003 }
3004 
3005 #undef __FUNCT__
3006 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3007 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3008 {
3009   PetscErrorCode ierr;
3010   Mat            *dummy;
3011 
3012   PetscFunctionBegin;
3013   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3014   *newmat = *dummy;
3015   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 #undef __FUNCT__
3020 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3021 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3022 {
3023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3024   PetscErrorCode ierr;
3025 
3026   PetscFunctionBegin;
3027   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 #undef __FUNCT__
3032 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3033 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3034 {
3035   PetscErrorCode ierr;
3036   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3037 
3038   PetscFunctionBegin;
3039   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3040   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3041   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3042   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 /* -------------------------------------------------------------------*/
3047 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3048                                        MatGetRow_MPIAIJ,
3049                                        MatRestoreRow_MPIAIJ,
3050                                        MatMult_MPIAIJ,
3051                                 /* 4*/ MatMultAdd_MPIAIJ,
3052                                        MatMultTranspose_MPIAIJ,
3053                                        MatMultTransposeAdd_MPIAIJ,
3054 #if defined(PETSC_HAVE_PBGL)
3055                                        MatSolve_MPIAIJ,
3056 #else
3057                                        0,
3058 #endif
3059                                        0,
3060                                        0,
3061                                 /*10*/ 0,
3062                                        0,
3063                                        0,
3064                                        MatSOR_MPIAIJ,
3065                                        MatTranspose_MPIAIJ,
3066                                 /*15*/ MatGetInfo_MPIAIJ,
3067                                        MatEqual_MPIAIJ,
3068                                        MatGetDiagonal_MPIAIJ,
3069                                        MatDiagonalScale_MPIAIJ,
3070                                        MatNorm_MPIAIJ,
3071                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3072                                        MatAssemblyEnd_MPIAIJ,
3073                                        MatSetOption_MPIAIJ,
3074                                        MatZeroEntries_MPIAIJ,
3075                                 /*24*/ MatZeroRows_MPIAIJ,
3076                                        0,
3077 #if defined(PETSC_HAVE_PBGL)
3078                                        0,
3079 #else
3080                                        0,
3081 #endif
3082                                        0,
3083                                        0,
3084                                 /*29*/ MatSetUp_MPIAIJ,
3085 #if defined(PETSC_HAVE_PBGL)
3086                                        0,
3087 #else
3088                                        0,
3089 #endif
3090                                        0,
3091                                        0,
3092                                        0,
3093                                 /*34*/ MatDuplicate_MPIAIJ,
3094                                        0,
3095                                        0,
3096                                        0,
3097                                        0,
3098                                 /*39*/ MatAXPY_MPIAIJ,
3099                                        MatGetSubMatrices_MPIAIJ,
3100                                        MatIncreaseOverlap_MPIAIJ,
3101                                        MatGetValues_MPIAIJ,
3102                                        MatCopy_MPIAIJ,
3103                                 /*44*/ MatGetRowMax_MPIAIJ,
3104                                        MatScale_MPIAIJ,
3105                                        0,
3106                                        0,
3107                                        MatZeroRowsColumns_MPIAIJ,
3108                                 /*49*/ MatSetRandom_MPIAIJ,
3109                                        0,
3110                                        0,
3111                                        0,
3112                                        0,
3113                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3114                                        0,
3115                                        MatSetUnfactored_MPIAIJ,
3116                                        MatPermute_MPIAIJ,
3117                                        0,
3118                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3119                                        MatDestroy_MPIAIJ,
3120                                        MatView_MPIAIJ,
3121                                        0,
3122                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3123                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3124                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3125                                        0,
3126                                        0,
3127                                        0,
3128                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3129                                        MatGetRowMinAbs_MPIAIJ,
3130                                        0,
3131                                        MatSetColoring_MPIAIJ,
3132                                        0,
3133                                        MatSetValuesAdifor_MPIAIJ,
3134                                 /*75*/ MatFDColoringApply_AIJ,
3135                                        0,
3136                                        0,
3137                                        0,
3138                                        MatFindZeroDiagonals_MPIAIJ,
3139                                 /*80*/ 0,
3140                                        0,
3141                                        0,
3142                                 /*83*/ MatLoad_MPIAIJ,
3143                                        0,
3144                                        0,
3145                                        0,
3146                                        0,
3147                                        0,
3148                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3149                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3150                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3151                                        MatPtAP_MPIAIJ_MPIAIJ,
3152                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3153                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3154                                        0,
3155                                        0,
3156                                        0,
3157                                        0,
3158                                 /*99*/ 0,
3159                                        0,
3160                                        0,
3161                                        MatConjugate_MPIAIJ,
3162                                        0,
3163                                 /*104*/MatSetValuesRow_MPIAIJ,
3164                                        MatRealPart_MPIAIJ,
3165                                        MatImaginaryPart_MPIAIJ,
3166                                        0,
3167                                        0,
3168                                 /*109*/0,
3169                                        MatGetRedundantMatrix_MPIAIJ,
3170                                        MatGetRowMin_MPIAIJ,
3171                                        0,
3172                                        0,
3173                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3174                                        0,
3175                                        0,
3176                                        0,
3177                                        0,
3178                                 /*119*/0,
3179                                        0,
3180                                        0,
3181                                        0,
3182                                        MatGetMultiProcBlock_MPIAIJ,
3183                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3184                                        MatGetColumnNorms_MPIAIJ,
3185                                        MatInvertBlockDiagonal_MPIAIJ,
3186                                        0,
3187                                        MatGetSubMatricesParallel_MPIAIJ,
3188                                 /*129*/0,
3189                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3190                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3191                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3192                                        0,
3193                                 /*134*/0,
3194                                        0,
3195                                        0,
3196                                        0,
3197                                        0,
3198                                 /*139*/0,
3199                                        0,
3200                                        0,
3201                                        MatFDColoringSetUp_MPIXAIJ
3202 };
3203 
3204 /* ----------------------------------------------------------------------------------------*/
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3208 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3209 {
3210   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3215   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3221 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3222 {
3223   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3224   PetscErrorCode ierr;
3225 
3226   PetscFunctionBegin;
3227   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3228   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 #undef __FUNCT__
3233 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3234 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3235 {
3236   Mat_MPIAIJ     *b;
3237   PetscErrorCode ierr;
3238 
3239   PetscFunctionBegin;
3240   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3242   b = (Mat_MPIAIJ*)B->data;
3243 
3244   if (!B->preallocated) {
3245     /* Explicitly create 2 MATSEQAIJ matrices. */
3246     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3247     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3248     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3249     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3250     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3251     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3252     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3253     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3254     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3255     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3256   }
3257 
3258   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3259   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3260   B->preallocated = PETSC_TRUE;
3261   PetscFunctionReturn(0);
3262 }
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3266 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3267 {
3268   Mat            mat;
3269   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3270   PetscErrorCode ierr;
3271 
3272   PetscFunctionBegin;
3273   *newmat = 0;
3274   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3275   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3276   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3277   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3278   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3279   a       = (Mat_MPIAIJ*)mat->data;
3280 
3281   mat->factortype   = matin->factortype;
3282   mat->assembled    = PETSC_TRUE;
3283   mat->insertmode   = NOT_SET_VALUES;
3284   mat->preallocated = PETSC_TRUE;
3285 
3286   a->size         = oldmat->size;
3287   a->rank         = oldmat->rank;
3288   a->donotstash   = oldmat->donotstash;
3289   a->roworiented  = oldmat->roworiented;
3290   a->rowindices   = 0;
3291   a->rowvalues    = 0;
3292   a->getrowactive = PETSC_FALSE;
3293 
3294   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3295   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3296 
3297   if (oldmat->colmap) {
3298 #if defined(PETSC_USE_CTABLE)
3299     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3300 #else
3301     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3302     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3303     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3304 #endif
3305   } else a->colmap = 0;
3306   if (oldmat->garray) {
3307     PetscInt len;
3308     len  = oldmat->B->cmap->n;
3309     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3310     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3311     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3312   } else a->garray = 0;
3313 
3314   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3315   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3316   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3317   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3318   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3319   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3320   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3321   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3322   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3323   *newmat = mat;
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatLoad_MPIAIJ"
3331 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3332 {
3333   PetscScalar    *vals,*svals;
3334   MPI_Comm       comm;
3335   PetscErrorCode ierr;
3336   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3337   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3338   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3339   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3340   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3341   int            fd;
3342   PetscInt       bs = 1;
3343 
3344   PetscFunctionBegin;
3345   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3346   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3347   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3348   if (!rank) {
3349     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3350     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3351     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3352   }
3353 
3354   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3355   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3356   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3357 
3358   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3359 
3360   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3361   M    = header[1]; N = header[2];
3362   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3363   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3364   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3365 
3366   /* If global sizes are set, check if they are consistent with that given in the file */
3367   if (sizesset) {
3368     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3369   }
3370   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3371   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3372 
3373   /* determine ownership of all (block) rows */
3374   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3375   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3376   else m = newMat->rmap->n; /* Set by user */
3377 
3378   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3379   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3380 
3381   /* First process needs enough room for process with most rows */
3382   if (!rank) {
3383     mmax = rowners[1];
3384     for (i=2; i<=size; i++) {
3385       mmax = PetscMax(mmax, rowners[i]);
3386     }
3387   } else mmax = -1;             /* unused, but compilers complain */
3388 
3389   rowners[0] = 0;
3390   for (i=2; i<=size; i++) {
3391     rowners[i] += rowners[i-1];
3392   }
3393   rstart = rowners[rank];
3394   rend   = rowners[rank+1];
3395 
3396   /* distribute row lengths to all processors */
3397   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3398   if (!rank) {
3399     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3400     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3401     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3402     for (j=0; j<m; j++) {
3403       procsnz[0] += ourlens[j];
3404     }
3405     for (i=1; i<size; i++) {
3406       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3407       /* calculate the number of nonzeros on each processor */
3408       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3409         procsnz[i] += rowlengths[j];
3410       }
3411       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3412     }
3413     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3414   } else {
3415     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3416   }
3417 
3418   if (!rank) {
3419     /* determine max buffer needed and allocate it */
3420     maxnz = 0;
3421     for (i=0; i<size; i++) {
3422       maxnz = PetscMax(maxnz,procsnz[i]);
3423     }
3424     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3425 
3426     /* read in my part of the matrix column indices  */
3427     nz   = procsnz[0];
3428     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3429     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3430 
3431     /* read in every one elses and ship off */
3432     for (i=1; i<size; i++) {
3433       nz   = procsnz[i];
3434       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3435       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3436     }
3437     ierr = PetscFree(cols);CHKERRQ(ierr);
3438   } else {
3439     /* determine buffer space needed for message */
3440     nz = 0;
3441     for (i=0; i<m; i++) {
3442       nz += ourlens[i];
3443     }
3444     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3445 
3446     /* receive message of column indices*/
3447     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3448   }
3449 
3450   /* determine column ownership if matrix is not square */
3451   if (N != M) {
3452     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3453     else n = newMat->cmap->n;
3454     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3455     cstart = cend - n;
3456   } else {
3457     cstart = rstart;
3458     cend   = rend;
3459     n      = cend - cstart;
3460   }
3461 
3462   /* loop over local rows, determining number of off diagonal entries */
3463   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3464   jj   = 0;
3465   for (i=0; i<m; i++) {
3466     for (j=0; j<ourlens[i]; j++) {
3467       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3468       jj++;
3469     }
3470   }
3471 
3472   for (i=0; i<m; i++) {
3473     ourlens[i] -= offlens[i];
3474   }
3475   if (!sizesset) {
3476     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3477   }
3478 
3479   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3480 
3481   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3482 
3483   for (i=0; i<m; i++) {
3484     ourlens[i] += offlens[i];
3485   }
3486 
3487   if (!rank) {
3488     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3489 
3490     /* read in my part of the matrix numerical values  */
3491     nz   = procsnz[0];
3492     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3493 
3494     /* insert into matrix */
3495     jj      = rstart;
3496     smycols = mycols;
3497     svals   = vals;
3498     for (i=0; i<m; i++) {
3499       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3500       smycols += ourlens[i];
3501       svals   += ourlens[i];
3502       jj++;
3503     }
3504 
3505     /* read in other processors and ship out */
3506     for (i=1; i<size; i++) {
3507       nz   = procsnz[i];
3508       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3509       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3510     }
3511     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3512   } else {
3513     /* receive numeric values */
3514     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3515 
3516     /* receive message of values*/
3517     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3518 
3519     /* insert into matrix */
3520     jj      = rstart;
3521     smycols = mycols;
3522     svals   = vals;
3523     for (i=0; i<m; i++) {
3524       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3525       smycols += ourlens[i];
3526       svals   += ourlens[i];
3527       jj++;
3528     }
3529   }
3530   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3531   ierr = PetscFree(vals);CHKERRQ(ierr);
3532   ierr = PetscFree(mycols);CHKERRQ(ierr);
3533   ierr = PetscFree(rowners);CHKERRQ(ierr);
3534   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3535   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3541 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3542 {
3543   PetscErrorCode ierr;
3544   IS             iscol_local;
3545   PetscInt       csize;
3546 
3547   PetscFunctionBegin;
3548   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3549   if (call == MAT_REUSE_MATRIX) {
3550     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3551     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3552   } else {
3553     PetscInt cbs;
3554     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3555     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3556     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3557   }
3558   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3559   if (call == MAT_INITIAL_MATRIX) {
3560     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3561     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3562   }
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3567 #undef __FUNCT__
3568 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3569 /*
3570     Not great since it makes two copies of the submatrix, first an SeqAIJ
3571   in local and then by concatenating the local matrices the end result.
3572   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3573 
3574   Note: This requires a sequential iscol with all indices.
3575 */
3576 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3577 {
3578   PetscErrorCode ierr;
3579   PetscMPIInt    rank,size;
3580   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3581   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3582   PetscBool      allcolumns, colflag;
3583   Mat            M,Mreuse;
3584   MatScalar      *vwork,*aa;
3585   MPI_Comm       comm;
3586   Mat_SeqAIJ     *aij;
3587 
3588   PetscFunctionBegin;
3589   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3590   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3592 
3593   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3594   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3595   if (colflag && ncol == mat->cmap->N) {
3596     allcolumns = PETSC_TRUE;
3597   } else {
3598     allcolumns = PETSC_FALSE;
3599   }
3600   if (call ==  MAT_REUSE_MATRIX) {
3601     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3602     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3603     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3604   } else {
3605     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3606   }
3607 
3608   /*
3609       m - number of local rows
3610       n - number of columns (same on all processors)
3611       rstart - first row in new global matrix generated
3612   */
3613   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3614   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3615   if (call == MAT_INITIAL_MATRIX) {
3616     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3617     ii  = aij->i;
3618     jj  = aij->j;
3619 
3620     /*
3621         Determine the number of non-zeros in the diagonal and off-diagonal
3622         portions of the matrix in order to do correct preallocation
3623     */
3624 
3625     /* first get start and end of "diagonal" columns */
3626     if (csize == PETSC_DECIDE) {
3627       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3628       if (mglobal == n) { /* square matrix */
3629         nlocal = m;
3630       } else {
3631         nlocal = n/size + ((n % size) > rank);
3632       }
3633     } else {
3634       nlocal = csize;
3635     }
3636     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3637     rstart = rend - nlocal;
3638     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3639 
3640     /* next, compute all the lengths */
3641     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3642     olens = dlens + m;
3643     for (i=0; i<m; i++) {
3644       jend = ii[i+1] - ii[i];
3645       olen = 0;
3646       dlen = 0;
3647       for (j=0; j<jend; j++) {
3648         if (*jj < rstart || *jj >= rend) olen++;
3649         else dlen++;
3650         jj++;
3651       }
3652       olens[i] = olen;
3653       dlens[i] = dlen;
3654     }
3655     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3656     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3657     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3658     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3659     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3660     ierr = PetscFree(dlens);CHKERRQ(ierr);
3661   } else {
3662     PetscInt ml,nl;
3663 
3664     M    = *newmat;
3665     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3666     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3667     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3668     /*
3669          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3670        rather than the slower MatSetValues().
3671     */
3672     M->was_assembled = PETSC_TRUE;
3673     M->assembled     = PETSC_FALSE;
3674   }
3675   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3676   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3677   ii   = aij->i;
3678   jj   = aij->j;
3679   aa   = aij->a;
3680   for (i=0; i<m; i++) {
3681     row   = rstart + i;
3682     nz    = ii[i+1] - ii[i];
3683     cwork = jj;     jj += nz;
3684     vwork = aa;     aa += nz;
3685     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3686   }
3687 
3688   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3689   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3690   *newmat = M;
3691 
3692   /* save submatrix used in processor for next request */
3693   if (call ==  MAT_INITIAL_MATRIX) {
3694     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3695     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3696   }
3697   PetscFunctionReturn(0);
3698 }
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3702 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3703 {
3704   PetscInt       m,cstart, cend,j,nnz,i,d;
3705   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3706   const PetscInt *JJ;
3707   PetscScalar    *values;
3708   PetscErrorCode ierr;
3709 
3710   PetscFunctionBegin;
3711   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3712 
3713   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3714   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3715   m      = B->rmap->n;
3716   cstart = B->cmap->rstart;
3717   cend   = B->cmap->rend;
3718   rstart = B->rmap->rstart;
3719 
3720   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3721 
3722 #if defined(PETSC_USE_DEBUGGING)
3723   for (i=0; i<m; i++) {
3724     nnz = Ii[i+1]- Ii[i];
3725     JJ  = J + Ii[i];
3726     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3727     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3728     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3729   }
3730 #endif
3731 
3732   for (i=0; i<m; i++) {
3733     nnz     = Ii[i+1]- Ii[i];
3734     JJ      = J + Ii[i];
3735     nnz_max = PetscMax(nnz_max,nnz);
3736     d       = 0;
3737     for (j=0; j<nnz; j++) {
3738       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3739     }
3740     d_nnz[i] = d;
3741     o_nnz[i] = nnz - d;
3742   }
3743   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3744   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3745 
3746   if (v) values = (PetscScalar*)v;
3747   else {
3748     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3749   }
3750 
3751   for (i=0; i<m; i++) {
3752     ii   = i + rstart;
3753     nnz  = Ii[i+1]- Ii[i];
3754     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3755   }
3756   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3757   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3758 
3759   if (!v) {
3760     ierr = PetscFree(values);CHKERRQ(ierr);
3761   }
3762   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 #undef __FUNCT__
3767 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3768 /*@
3769    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3770    (the default parallel PETSc format).
3771 
3772    Collective on MPI_Comm
3773 
3774    Input Parameters:
3775 +  B - the matrix
3776 .  i - the indices into j for the start of each local row (starts with zero)
3777 .  j - the column indices for each local row (starts with zero)
3778 -  v - optional values in the matrix
3779 
3780    Level: developer
3781 
3782    Notes:
3783        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3784      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3785      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3786 
3787        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3788 
3789        The format which is used for the sparse matrix input, is equivalent to a
3790     row-major ordering.. i.e for the following matrix, the input data expected is
3791     as shown:
3792 
3793         1 0 0
3794         2 0 3     P0
3795        -------
3796         4 5 6     P1
3797 
3798      Process0 [P0]: rows_owned=[0,1]
3799         i =  {0,1,3}  [size = nrow+1  = 2+1]
3800         j =  {0,0,2}  [size = nz = 6]
3801         v =  {1,2,3}  [size = nz = 6]
3802 
3803      Process1 [P1]: rows_owned=[2]
3804         i =  {0,3}    [size = nrow+1  = 1+1]
3805         j =  {0,1,2}  [size = nz = 6]
3806         v =  {4,5,6}  [size = nz = 6]
3807 
3808 .keywords: matrix, aij, compressed row, sparse, parallel
3809 
3810 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3811           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3812 @*/
3813 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3814 {
3815   PetscErrorCode ierr;
3816 
3817   PetscFunctionBegin;
3818   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3819   PetscFunctionReturn(0);
3820 }
3821 
3822 #undef __FUNCT__
3823 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3824 /*@C
3825    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3826    (the default parallel PETSc format).  For good matrix assembly performance
3827    the user should preallocate the matrix storage by setting the parameters
3828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3829    performance can be increased by more than a factor of 50.
3830 
3831    Collective on MPI_Comm
3832 
3833    Input Parameters:
3834 +  A - the matrix
3835 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3836            (same value is used for all local rows)
3837 .  d_nnz - array containing the number of nonzeros in the various rows of the
3838            DIAGONAL portion of the local submatrix (possibly different for each row)
3839            or NULL, if d_nz is used to specify the nonzero structure.
3840            The size of this array is equal to the number of local rows, i.e 'm'.
3841            For matrices that will be factored, you must leave room for (and set)
3842            the diagonal entry even if it is zero.
3843 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3844            submatrix (same value is used for all local rows).
3845 -  o_nnz - array containing the number of nonzeros in the various rows of the
3846            OFF-DIAGONAL portion of the local submatrix (possibly different for
3847            each row) or NULL, if o_nz is used to specify the nonzero
3848            structure. The size of this array is equal to the number
3849            of local rows, i.e 'm'.
3850 
3851    If the *_nnz parameter is given then the *_nz parameter is ignored
3852 
3853    The AIJ format (also called the Yale sparse matrix format or
3854    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3855    storage.  The stored row and column indices begin with zero.
3856    See Users-Manual: ch_mat for details.
3857 
3858    The parallel matrix is partitioned such that the first m0 rows belong to
3859    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3860    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3861 
3862    The DIAGONAL portion of the local submatrix of a processor can be defined
3863    as the submatrix which is obtained by extraction the part corresponding to
3864    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3865    first row that belongs to the processor, r2 is the last row belonging to
3866    the this processor, and c1-c2 is range of indices of the local part of a
3867    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3868    common case of a square matrix, the row and column ranges are the same and
3869    the DIAGONAL part is also square. The remaining portion of the local
3870    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3871 
3872    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3873 
3874    You can call MatGetInfo() to get information on how effective the preallocation was;
3875    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3876    You can also run with the option -info and look for messages with the string
3877    malloc in them to see if additional memory allocation was needed.
3878 
3879    Example usage:
3880 
3881    Consider the following 8x8 matrix with 34 non-zero values, that is
3882    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3883    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3884    as follows:
3885 
3886 .vb
3887             1  2  0  |  0  3  0  |  0  4
3888     Proc0   0  5  6  |  7  0  0  |  8  0
3889             9  0 10  | 11  0  0  | 12  0
3890     -------------------------------------
3891            13  0 14  | 15 16 17  |  0  0
3892     Proc1   0 18  0  | 19 20 21  |  0  0
3893             0  0  0  | 22 23  0  | 24  0
3894     -------------------------------------
3895     Proc2  25 26 27  |  0  0 28  | 29  0
3896            30  0  0  | 31 32 33  |  0 34
3897 .ve
3898 
3899    This can be represented as a collection of submatrices as:
3900 
3901 .vb
3902       A B C
3903       D E F
3904       G H I
3905 .ve
3906 
3907    Where the submatrices A,B,C are owned by proc0, D,E,F are
3908    owned by proc1, G,H,I are owned by proc2.
3909 
3910    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3911    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3912    The 'M','N' parameters are 8,8, and have the same values on all procs.
3913 
3914    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3915    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3916    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3917    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3918    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3919    matrix, ans [DF] as another SeqAIJ matrix.
3920 
3921    When d_nz, o_nz parameters are specified, d_nz storage elements are
3922    allocated for every row of the local diagonal submatrix, and o_nz
3923    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3924    One way to choose d_nz and o_nz is to use the max nonzerors per local
3925    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3926    In this case, the values of d_nz,o_nz are:
3927 .vb
3928      proc0 : dnz = 2, o_nz = 2
3929      proc1 : dnz = 3, o_nz = 2
3930      proc2 : dnz = 1, o_nz = 4
3931 .ve
3932    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3933    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3934    for proc3. i.e we are using 12+15+10=37 storage locations to store
3935    34 values.
3936 
3937    When d_nnz, o_nnz parameters are specified, the storage is specified
3938    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3939    In the above case the values for d_nnz,o_nnz are:
3940 .vb
3941      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3942      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3943      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3944 .ve
3945    Here the space allocated is sum of all the above values i.e 34, and
3946    hence pre-allocation is perfect.
3947 
3948    Level: intermediate
3949 
3950 .keywords: matrix, aij, compressed row, sparse, parallel
3951 
3952 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3953           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3954 @*/
3955 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3956 {
3957   PetscErrorCode ierr;
3958 
3959   PetscFunctionBegin;
3960   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3961   PetscValidType(B,1);
3962   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 #undef __FUNCT__
3967 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3968 /*@
3969      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3970          CSR format the local rows.
3971 
3972    Collective on MPI_Comm
3973 
3974    Input Parameters:
3975 +  comm - MPI communicator
3976 .  m - number of local rows (Cannot be PETSC_DECIDE)
3977 .  n - This value should be the same as the local size used in creating the
3978        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3979        calculated if N is given) For square matrices n is almost always m.
3980 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3981 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3982 .   i - row indices
3983 .   j - column indices
3984 -   a - matrix values
3985 
3986    Output Parameter:
3987 .   mat - the matrix
3988 
3989    Level: intermediate
3990 
3991    Notes:
3992        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3993      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3994      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3995 
3996        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3997 
3998        The format which is used for the sparse matrix input, is equivalent to a
3999     row-major ordering.. i.e for the following matrix, the input data expected is
4000     as shown:
4001 
4002         1 0 0
4003         2 0 3     P0
4004        -------
4005         4 5 6     P1
4006 
4007      Process0 [P0]: rows_owned=[0,1]
4008         i =  {0,1,3}  [size = nrow+1  = 2+1]
4009         j =  {0,0,2}  [size = nz = 6]
4010         v =  {1,2,3}  [size = nz = 6]
4011 
4012      Process1 [P1]: rows_owned=[2]
4013         i =  {0,3}    [size = nrow+1  = 1+1]
4014         j =  {0,1,2}  [size = nz = 6]
4015         v =  {4,5,6}  [size = nz = 6]
4016 
4017 .keywords: matrix, aij, compressed row, sparse, parallel
4018 
4019 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4020           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4021 @*/
4022 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4023 {
4024   PetscErrorCode ierr;
4025 
4026   PetscFunctionBegin;
4027   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4028   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4029   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4030   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4031   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4032   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4033   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4034   PetscFunctionReturn(0);
4035 }
4036 
4037 #undef __FUNCT__
4038 #define __FUNCT__ "MatCreateAIJ"
4039 /*@C
4040    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4041    (the default parallel PETSc format).  For good matrix assembly performance
4042    the user should preallocate the matrix storage by setting the parameters
4043    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4044    performance can be increased by more than a factor of 50.
4045 
4046    Collective on MPI_Comm
4047 
4048    Input Parameters:
4049 +  comm - MPI communicator
4050 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4051            This value should be the same as the local size used in creating the
4052            y vector for the matrix-vector product y = Ax.
4053 .  n - This value should be the same as the local size used in creating the
4054        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4055        calculated if N is given) For square matrices n is almost always m.
4056 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4057 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4058 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4059            (same value is used for all local rows)
4060 .  d_nnz - array containing the number of nonzeros in the various rows of the
4061            DIAGONAL portion of the local submatrix (possibly different for each row)
4062            or NULL, if d_nz is used to specify the nonzero structure.
4063            The size of this array is equal to the number of local rows, i.e 'm'.
4064 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4065            submatrix (same value is used for all local rows).
4066 -  o_nnz - array containing the number of nonzeros in the various rows of the
4067            OFF-DIAGONAL portion of the local submatrix (possibly different for
4068            each row) or NULL, if o_nz is used to specify the nonzero
4069            structure. The size of this array is equal to the number
4070            of local rows, i.e 'm'.
4071 
4072    Output Parameter:
4073 .  A - the matrix
4074 
4075    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4076    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4077    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4078 
4079    Notes:
4080    If the *_nnz parameter is given then the *_nz parameter is ignored
4081 
4082    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4083    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4084    storage requirements for this matrix.
4085 
4086    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4087    processor than it must be used on all processors that share the object for
4088    that argument.
4089 
4090    The user MUST specify either the local or global matrix dimensions
4091    (possibly both).
4092 
4093    The parallel matrix is partitioned across processors such that the
4094    first m0 rows belong to process 0, the next m1 rows belong to
4095    process 1, the next m2 rows belong to process 2 etc.. where
4096    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4097    values corresponding to [m x N] submatrix.
4098 
4099    The columns are logically partitioned with the n0 columns belonging
4100    to 0th partition, the next n1 columns belonging to the next
4101    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4102 
4103    The DIAGONAL portion of the local submatrix on any given processor
4104    is the submatrix corresponding to the rows and columns m,n
4105    corresponding to the given processor. i.e diagonal matrix on
4106    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4107    etc. The remaining portion of the local submatrix [m x (N-n)]
4108    constitute the OFF-DIAGONAL portion. The example below better
4109    illustrates this concept.
4110 
4111    For a square global matrix we define each processor's diagonal portion
4112    to be its local rows and the corresponding columns (a square submatrix);
4113    each processor's off-diagonal portion encompasses the remainder of the
4114    local matrix (a rectangular submatrix).
4115 
4116    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4117 
4118    When calling this routine with a single process communicator, a matrix of
4119    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4120    type of communicator, use the construction mechanism:
4121      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4122 
4123    By default, this format uses inodes (identical nodes) when possible.
4124    We search for consecutive rows with the same nonzero structure, thereby
4125    reusing matrix information to achieve increased efficiency.
4126 
4127    Options Database Keys:
4128 +  -mat_no_inode  - Do not use inodes
4129 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4130 -  -mat_aij_oneindex - Internally use indexing starting at 1
4131         rather than 0.  Note that when calling MatSetValues(),
4132         the user still MUST index entries starting at 0!
4133 
4134 
4135    Example usage:
4136 
4137    Consider the following 8x8 matrix with 34 non-zero values, that is
4138    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4139    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4140    as follows:
4141 
4142 .vb
4143             1  2  0  |  0  3  0  |  0  4
4144     Proc0   0  5  6  |  7  0  0  |  8  0
4145             9  0 10  | 11  0  0  | 12  0
4146     -------------------------------------
4147            13  0 14  | 15 16 17  |  0  0
4148     Proc1   0 18  0  | 19 20 21  |  0  0
4149             0  0  0  | 22 23  0  | 24  0
4150     -------------------------------------
4151     Proc2  25 26 27  |  0  0 28  | 29  0
4152            30  0  0  | 31 32 33  |  0 34
4153 .ve
4154 
4155    This can be represented as a collection of submatrices as:
4156 
4157 .vb
4158       A B C
4159       D E F
4160       G H I
4161 .ve
4162 
4163    Where the submatrices A,B,C are owned by proc0, D,E,F are
4164    owned by proc1, G,H,I are owned by proc2.
4165 
4166    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4167    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4168    The 'M','N' parameters are 8,8, and have the same values on all procs.
4169 
4170    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4171    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4172    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4173    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4174    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4175    matrix, ans [DF] as another SeqAIJ matrix.
4176 
4177    When d_nz, o_nz parameters are specified, d_nz storage elements are
4178    allocated for every row of the local diagonal submatrix, and o_nz
4179    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4180    One way to choose d_nz and o_nz is to use the max nonzerors per local
4181    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4182    In this case, the values of d_nz,o_nz are:
4183 .vb
4184      proc0 : dnz = 2, o_nz = 2
4185      proc1 : dnz = 3, o_nz = 2
4186      proc2 : dnz = 1, o_nz = 4
4187 .ve
4188    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4189    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4190    for proc3. i.e we are using 12+15+10=37 storage locations to store
4191    34 values.
4192 
4193    When d_nnz, o_nnz parameters are specified, the storage is specified
4194    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4195    In the above case the values for d_nnz,o_nnz are:
4196 .vb
4197      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4198      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4199      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4200 .ve
4201    Here the space allocated is sum of all the above values i.e 34, and
4202    hence pre-allocation is perfect.
4203 
4204    Level: intermediate
4205 
4206 .keywords: matrix, aij, compressed row, sparse, parallel
4207 
4208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4209           MPIAIJ, MatCreateMPIAIJWithArrays()
4210 @*/
4211 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4212 {
4213   PetscErrorCode ierr;
4214   PetscMPIInt    size;
4215 
4216   PetscFunctionBegin;
4217   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4218   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4219   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4220   if (size > 1) {
4221     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4222     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4223   } else {
4224     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4225     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4226   }
4227   PetscFunctionReturn(0);
4228 }
4229 
4230 #undef __FUNCT__
4231 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4232 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4233 {
4234   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4235 
4236   PetscFunctionBegin;
4237   *Ad     = a->A;
4238   *Ao     = a->B;
4239   *colmap = a->garray;
4240   PetscFunctionReturn(0);
4241 }
4242 
4243 #undef __FUNCT__
4244 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4245 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4246 {
4247   PetscErrorCode ierr;
4248   PetscInt       i;
4249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4250 
4251   PetscFunctionBegin;
4252   if (coloring->ctype == IS_COLORING_GLOBAL) {
4253     ISColoringValue *allcolors,*colors;
4254     ISColoring      ocoloring;
4255 
4256     /* set coloring for diagonal portion */
4257     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4258 
4259     /* set coloring for off-diagonal portion */
4260     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4261     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4262     for (i=0; i<a->B->cmap->n; i++) {
4263       colors[i] = allcolors[a->garray[i]];
4264     }
4265     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4266     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4267     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4268     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4269   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4270     ISColoringValue *colors;
4271     PetscInt        *larray;
4272     ISColoring      ocoloring;
4273 
4274     /* set coloring for diagonal portion */
4275     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4276     for (i=0; i<a->A->cmap->n; i++) {
4277       larray[i] = i + A->cmap->rstart;
4278     }
4279     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4280     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4281     for (i=0; i<a->A->cmap->n; i++) {
4282       colors[i] = coloring->colors[larray[i]];
4283     }
4284     ierr = PetscFree(larray);CHKERRQ(ierr);
4285     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4286     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4287     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4288 
4289     /* set coloring for off-diagonal portion */
4290     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4291     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4292     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4293     for (i=0; i<a->B->cmap->n; i++) {
4294       colors[i] = coloring->colors[larray[i]];
4295     }
4296     ierr = PetscFree(larray);CHKERRQ(ierr);
4297     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4298     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4299     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4300   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 #undef __FUNCT__
4305 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4306 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4307 {
4308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4309   PetscErrorCode ierr;
4310 
4311   PetscFunctionBegin;
4312   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4313   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 #undef __FUNCT__
4318 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4319 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4320 {
4321   PetscErrorCode ierr;
4322   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4323   PetscInt       *indx;
4324 
4325   PetscFunctionBegin;
4326   /* This routine will ONLY return MPIAIJ type matrix */
4327   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4328   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4329   if (n == PETSC_DECIDE) {
4330     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4331   }
4332   /* Check sum(n) = N */
4333   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4334   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4335 
4336   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4337   rstart -= m;
4338 
4339   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4340   for (i=0; i<m; i++) {
4341     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4342     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4343     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4344   }
4345 
4346   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4347   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4348   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4349   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4350   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4351   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 #undef __FUNCT__
4356 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4357 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4358 {
4359   PetscErrorCode ierr;
4360   PetscInt       m,N,i,rstart,nnz,Ii;
4361   PetscInt       *indx;
4362   PetscScalar    *values;
4363 
4364   PetscFunctionBegin;
4365   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4366   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4367   for (i=0; i<m; i++) {
4368     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4369     Ii   = i + rstart;
4370     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4371     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4372   }
4373   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4374   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4375   PetscFunctionReturn(0);
4376 }
4377 
4378 #undef __FUNCT__
4379 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4380 /*@
4381       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4382                  matrices from each processor
4383 
4384     Collective on MPI_Comm
4385 
4386    Input Parameters:
4387 +    comm - the communicators the parallel matrix will live on
4388 .    inmat - the input sequential matrices
4389 .    n - number of local columns (or PETSC_DECIDE)
4390 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4391 
4392    Output Parameter:
4393 .    outmat - the parallel matrix generated
4394 
4395     Level: advanced
4396 
4397    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4398 
4399 @*/
4400 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4401 {
4402   PetscErrorCode ierr;
4403   PetscMPIInt    size;
4404 
4405   PetscFunctionBegin;
4406   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4407   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4408   if (size == 1) {
4409     if (scall == MAT_INITIAL_MATRIX) {
4410       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4411     } else {
4412       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4413     }
4414   } else {
4415     if (scall == MAT_INITIAL_MATRIX) {
4416       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4417     }
4418     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4419   }
4420   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 #undef __FUNCT__
4425 #define __FUNCT__ "MatFileSplit"
4426 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4427 {
4428   PetscErrorCode    ierr;
4429   PetscMPIInt       rank;
4430   PetscInt          m,N,i,rstart,nnz;
4431   size_t            len;
4432   const PetscInt    *indx;
4433   PetscViewer       out;
4434   char              *name;
4435   Mat               B;
4436   const PetscScalar *values;
4437 
4438   PetscFunctionBegin;
4439   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4440   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4441   /* Should this be the type of the diagonal block of A? */
4442   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4443   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4444   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4445   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4446   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4447   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4448   for (i=0; i<m; i++) {
4449     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4450     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4451     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4452   }
4453   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4454   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4455 
4456   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4457   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4458   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4459   sprintf(name,"%s.%d",outfile,rank);
4460   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4461   ierr = PetscFree(name);CHKERRQ(ierr);
4462   ierr = MatView(B,out);CHKERRQ(ierr);
4463   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4464   ierr = MatDestroy(&B);CHKERRQ(ierr);
4465   PetscFunctionReturn(0);
4466 }
4467 
4468 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4469 #undef __FUNCT__
4470 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4471 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4472 {
4473   PetscErrorCode      ierr;
4474   Mat_Merge_SeqsToMPI *merge;
4475   PetscContainer      container;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4479   if (container) {
4480     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4493     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4494     ierr = PetscFree(merge);CHKERRQ(ierr);
4495     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4496   }
4497   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4498   PetscFunctionReturn(0);
4499 }
4500 
4501 #include <../src/mat/utils/freespace.h>
4502 #include <petscbt.h>
4503 
4504 #undef __FUNCT__
4505 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4506 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4507 {
4508   PetscErrorCode      ierr;
4509   MPI_Comm            comm;
4510   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4511   PetscMPIInt         size,rank,taga,*len_s;
4512   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4513   PetscInt            proc,m;
4514   PetscInt            **buf_ri,**buf_rj;
4515   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4516   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4517   MPI_Request         *s_waits,*r_waits;
4518   MPI_Status          *status;
4519   MatScalar           *aa=a->a;
4520   MatScalar           **abuf_r,*ba_i;
4521   Mat_Merge_SeqsToMPI *merge;
4522   PetscContainer      container;
4523 
4524   PetscFunctionBegin;
4525   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4526   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4527 
4528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4529   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4530 
4531   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4532   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4533 
4534   bi     = merge->bi;
4535   bj     = merge->bj;
4536   buf_ri = merge->buf_ri;
4537   buf_rj = merge->buf_rj;
4538 
4539   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4540   owners = merge->rowmap->range;
4541   len_s  = merge->len_s;
4542 
4543   /* send and recv matrix values */
4544   /*-----------------------------*/
4545   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4546   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4547 
4548   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4549   for (proc=0,k=0; proc<size; proc++) {
4550     if (!len_s[proc]) continue;
4551     i    = owners[proc];
4552     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4553     k++;
4554   }
4555 
4556   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4557   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4558   ierr = PetscFree(status);CHKERRQ(ierr);
4559 
4560   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4561   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4562 
4563   /* insert mat values of mpimat */
4564   /*----------------------------*/
4565   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4566   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4567 
4568   for (k=0; k<merge->nrecv; k++) {
4569     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4570     nrows       = *(buf_ri_k[k]);
4571     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4572     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4573   }
4574 
4575   /* set values of ba */
4576   m = merge->rowmap->n;
4577   for (i=0; i<m; i++) {
4578     arow = owners[rank] + i;
4579     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4580     bnzi = bi[i+1] - bi[i];
4581     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4582 
4583     /* add local non-zero vals of this proc's seqmat into ba */
4584     anzi   = ai[arow+1] - ai[arow];
4585     aj     = a->j + ai[arow];
4586     aa     = a->a + ai[arow];
4587     nextaj = 0;
4588     for (j=0; nextaj<anzi; j++) {
4589       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4590         ba_i[j] += aa[nextaj++];
4591       }
4592     }
4593 
4594     /* add received vals into ba */
4595     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4596       /* i-th row */
4597       if (i == *nextrow[k]) {
4598         anzi   = *(nextai[k]+1) - *nextai[k];
4599         aj     = buf_rj[k] + *(nextai[k]);
4600         aa     = abuf_r[k] + *(nextai[k]);
4601         nextaj = 0;
4602         for (j=0; nextaj<anzi; j++) {
4603           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4604             ba_i[j] += aa[nextaj++];
4605           }
4606         }
4607         nextrow[k]++; nextai[k]++;
4608       }
4609     }
4610     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4611   }
4612   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4613   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4614 
4615   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4616   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4617   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4618   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4619   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4620   PetscFunctionReturn(0);
4621 }
4622 
4623 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4624 
4625 #undef __FUNCT__
4626 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4627 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4628 {
4629   PetscErrorCode      ierr;
4630   Mat                 B_mpi;
4631   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4632   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4633   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4634   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4635   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4636   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4637   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4638   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4639   MPI_Status          *status;
4640   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4641   PetscBT             lnkbt;
4642   Mat_Merge_SeqsToMPI *merge;
4643   PetscContainer      container;
4644 
4645   PetscFunctionBegin;
4646   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4647 
4648   /* make sure it is a PETSc comm */
4649   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4650   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4651   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4652 
4653   ierr = PetscNew(&merge);CHKERRQ(ierr);
4654   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4655 
4656   /* determine row ownership */
4657   /*---------------------------------------------------------*/
4658   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4659   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4660   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4661   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4662   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4663   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4664   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4665 
4666   m      = merge->rowmap->n;
4667   owners = merge->rowmap->range;
4668 
4669   /* determine the number of messages to send, their lengths */
4670   /*---------------------------------------------------------*/
4671   len_s = merge->len_s;
4672 
4673   len          = 0; /* length of buf_si[] */
4674   merge->nsend = 0;
4675   for (proc=0; proc<size; proc++) {
4676     len_si[proc] = 0;
4677     if (proc == rank) {
4678       len_s[proc] = 0;
4679     } else {
4680       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4681       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4682     }
4683     if (len_s[proc]) {
4684       merge->nsend++;
4685       nrows = 0;
4686       for (i=owners[proc]; i<owners[proc+1]; i++) {
4687         if (ai[i+1] > ai[i]) nrows++;
4688       }
4689       len_si[proc] = 2*(nrows+1);
4690       len         += len_si[proc];
4691     }
4692   }
4693 
4694   /* determine the number and length of messages to receive for ij-structure */
4695   /*-------------------------------------------------------------------------*/
4696   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4697   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4698 
4699   /* post the Irecv of j-structure */
4700   /*-------------------------------*/
4701   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4702   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4703 
4704   /* post the Isend of j-structure */
4705   /*--------------------------------*/
4706   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4707 
4708   for (proc=0, k=0; proc<size; proc++) {
4709     if (!len_s[proc]) continue;
4710     i    = owners[proc];
4711     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4712     k++;
4713   }
4714 
4715   /* receives and sends of j-structure are complete */
4716   /*------------------------------------------------*/
4717   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4718   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4719 
4720   /* send and recv i-structure */
4721   /*---------------------------*/
4722   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4723   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4724 
4725   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4726   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4727   for (proc=0,k=0; proc<size; proc++) {
4728     if (!len_s[proc]) continue;
4729     /* form outgoing message for i-structure:
4730          buf_si[0]:                 nrows to be sent
4731                [1:nrows]:           row index (global)
4732                [nrows+1:2*nrows+1]: i-structure index
4733     */
4734     /*-------------------------------------------*/
4735     nrows       = len_si[proc]/2 - 1;
4736     buf_si_i    = buf_si + nrows+1;
4737     buf_si[0]   = nrows;
4738     buf_si_i[0] = 0;
4739     nrows       = 0;
4740     for (i=owners[proc]; i<owners[proc+1]; i++) {
4741       anzi = ai[i+1] - ai[i];
4742       if (anzi) {
4743         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4744         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4745         nrows++;
4746       }
4747     }
4748     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4749     k++;
4750     buf_si += len_si[proc];
4751   }
4752 
4753   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4754   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4755 
4756   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4757   for (i=0; i<merge->nrecv; i++) {
4758     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4759   }
4760 
4761   ierr = PetscFree(len_si);CHKERRQ(ierr);
4762   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4763   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4764   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4765   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4766   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4767   ierr = PetscFree(status);CHKERRQ(ierr);
4768 
4769   /* compute a local seq matrix in each processor */
4770   /*----------------------------------------------*/
4771   /* allocate bi array and free space for accumulating nonzero column info */
4772   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4773   bi[0] = 0;
4774 
4775   /* create and initialize a linked list */
4776   nlnk = N+1;
4777   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4778 
4779   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4780   len  = ai[owners[rank+1]] - ai[owners[rank]];
4781   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4782 
4783   current_space = free_space;
4784 
4785   /* determine symbolic info for each local row */
4786   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4787 
4788   for (k=0; k<merge->nrecv; k++) {
4789     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4790     nrows       = *buf_ri_k[k];
4791     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4792     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4793   }
4794 
4795   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4796   len  = 0;
4797   for (i=0; i<m; i++) {
4798     bnzi = 0;
4799     /* add local non-zero cols of this proc's seqmat into lnk */
4800     arow  = owners[rank] + i;
4801     anzi  = ai[arow+1] - ai[arow];
4802     aj    = a->j + ai[arow];
4803     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4804     bnzi += nlnk;
4805     /* add received col data into lnk */
4806     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4807       if (i == *nextrow[k]) { /* i-th row */
4808         anzi  = *(nextai[k]+1) - *nextai[k];
4809         aj    = buf_rj[k] + *nextai[k];
4810         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4811         bnzi += nlnk;
4812         nextrow[k]++; nextai[k]++;
4813       }
4814     }
4815     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4816 
4817     /* if free space is not available, make more free space */
4818     if (current_space->local_remaining<bnzi) {
4819       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4820       nspacedouble++;
4821     }
4822     /* copy data into free space, then initialize lnk */
4823     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4824     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4825 
4826     current_space->array           += bnzi;
4827     current_space->local_used      += bnzi;
4828     current_space->local_remaining -= bnzi;
4829 
4830     bi[i+1] = bi[i] + bnzi;
4831   }
4832 
4833   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4834 
4835   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4836   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4837   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4838 
4839   /* create symbolic parallel matrix B_mpi */
4840   /*---------------------------------------*/
4841   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4842   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4843   if (n==PETSC_DECIDE) {
4844     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4845   } else {
4846     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4847   }
4848   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4849   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4850   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4851   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4852   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4853 
4854   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4855   B_mpi->assembled    = PETSC_FALSE;
4856   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4857   merge->bi           = bi;
4858   merge->bj           = bj;
4859   merge->buf_ri       = buf_ri;
4860   merge->buf_rj       = buf_rj;
4861   merge->coi          = NULL;
4862   merge->coj          = NULL;
4863   merge->owners_co    = NULL;
4864 
4865   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4866 
4867   /* attach the supporting struct to B_mpi for reuse */
4868   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4869   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4870   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4871   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4872   *mpimat = B_mpi;
4873 
4874   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4875   PetscFunctionReturn(0);
4876 }
4877 
4878 #undef __FUNCT__
4879 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4880 /*@C
4881       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4882                  matrices from each processor
4883 
4884     Collective on MPI_Comm
4885 
4886    Input Parameters:
4887 +    comm - the communicators the parallel matrix will live on
4888 .    seqmat - the input sequential matrices
4889 .    m - number of local rows (or PETSC_DECIDE)
4890 .    n - number of local columns (or PETSC_DECIDE)
4891 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4892 
4893    Output Parameter:
4894 .    mpimat - the parallel matrix generated
4895 
4896     Level: advanced
4897 
4898    Notes:
4899      The dimensions of the sequential matrix in each processor MUST be the same.
4900      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4901      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4902 @*/
4903 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4904 {
4905   PetscErrorCode ierr;
4906   PetscMPIInt    size;
4907 
4908   PetscFunctionBegin;
4909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4910   if (size == 1) {
4911     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4912     if (scall == MAT_INITIAL_MATRIX) {
4913       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4914     } else {
4915       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4916     }
4917     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4918     PetscFunctionReturn(0);
4919   }
4920   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4921   if (scall == MAT_INITIAL_MATRIX) {
4922     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4923   }
4924   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4925   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4926   PetscFunctionReturn(0);
4927 }
4928 
4929 #undef __FUNCT__
4930 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4931 /*@
4932      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4933           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4934           with MatGetSize()
4935 
4936     Not Collective
4937 
4938    Input Parameters:
4939 +    A - the matrix
4940 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4941 
4942    Output Parameter:
4943 .    A_loc - the local sequential matrix generated
4944 
4945     Level: developer
4946 
4947 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4948 
4949 @*/
4950 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4951 {
4952   PetscErrorCode ierr;
4953   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4954   Mat_SeqAIJ     *mat,*a,*b;
4955   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4956   MatScalar      *aa,*ba,*cam;
4957   PetscScalar    *ca;
4958   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4959   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4960   PetscBool      match;
4961 
4962   PetscFunctionBegin;
4963   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4964   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4965   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4966   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4967   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4968   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4969   aa = a->a; ba = b->a;
4970   if (scall == MAT_INITIAL_MATRIX) {
4971     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4972     ci[0] = 0;
4973     for (i=0; i<am; i++) {
4974       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4975     }
4976     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4977     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4978     k    = 0;
4979     for (i=0; i<am; i++) {
4980       ncols_o = bi[i+1] - bi[i];
4981       ncols_d = ai[i+1] - ai[i];
4982       /* off-diagonal portion of A */
4983       for (jo=0; jo<ncols_o; jo++) {
4984         col = cmap[*bj];
4985         if (col >= cstart) break;
4986         cj[k]   = col; bj++;
4987         ca[k++] = *ba++;
4988       }
4989       /* diagonal portion of A */
4990       for (j=0; j<ncols_d; j++) {
4991         cj[k]   = cstart + *aj++;
4992         ca[k++] = *aa++;
4993       }
4994       /* off-diagonal portion of A */
4995       for (j=jo; j<ncols_o; j++) {
4996         cj[k]   = cmap[*bj++];
4997         ca[k++] = *ba++;
4998       }
4999     }
5000     /* put together the new matrix */
5001     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5002     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5003     /* Since these are PETSc arrays, change flags to free them as necessary. */
5004     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5005     mat->free_a  = PETSC_TRUE;
5006     mat->free_ij = PETSC_TRUE;
5007     mat->nonew   = 0;
5008   } else if (scall == MAT_REUSE_MATRIX) {
5009     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5010     ci = mat->i; cj = mat->j; cam = mat->a;
5011     for (i=0; i<am; i++) {
5012       /* off-diagonal portion of A */
5013       ncols_o = bi[i+1] - bi[i];
5014       for (jo=0; jo<ncols_o; jo++) {
5015         col = cmap[*bj];
5016         if (col >= cstart) break;
5017         *cam++ = *ba++; bj++;
5018       }
5019       /* diagonal portion of A */
5020       ncols_d = ai[i+1] - ai[i];
5021       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5022       /* off-diagonal portion of A */
5023       for (j=jo; j<ncols_o; j++) {
5024         *cam++ = *ba++; bj++;
5025       }
5026     }
5027   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5028   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5029   PetscFunctionReturn(0);
5030 }
5031 
5032 #undef __FUNCT__
5033 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5034 /*@C
5035      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5036 
5037     Not Collective
5038 
5039    Input Parameters:
5040 +    A - the matrix
5041 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5042 -    row, col - index sets of rows and columns to extract (or NULL)
5043 
5044    Output Parameter:
5045 .    A_loc - the local sequential matrix generated
5046 
5047     Level: developer
5048 
5049 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5050 
5051 @*/
5052 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5053 {
5054   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5055   PetscErrorCode ierr;
5056   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5057   IS             isrowa,iscola;
5058   Mat            *aloc;
5059   PetscBool      match;
5060 
5061   PetscFunctionBegin;
5062   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5063   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5064   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5065   if (!row) {
5066     start = A->rmap->rstart; end = A->rmap->rend;
5067     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5068   } else {
5069     isrowa = *row;
5070   }
5071   if (!col) {
5072     start = A->cmap->rstart;
5073     cmap  = a->garray;
5074     nzA   = a->A->cmap->n;
5075     nzB   = a->B->cmap->n;
5076     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5077     ncols = 0;
5078     for (i=0; i<nzB; i++) {
5079       if (cmap[i] < start) idx[ncols++] = cmap[i];
5080       else break;
5081     }
5082     imark = i;
5083     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5084     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5085     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5086   } else {
5087     iscola = *col;
5088   }
5089   if (scall != MAT_INITIAL_MATRIX) {
5090     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5091     aloc[0] = *A_loc;
5092   }
5093   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5094   *A_loc = aloc[0];
5095   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5096   if (!row) {
5097     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5098   }
5099   if (!col) {
5100     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5101   }
5102   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5103   PetscFunctionReturn(0);
5104 }
5105 
5106 #undef __FUNCT__
5107 #define __FUNCT__ "MatGetBrowsOfAcols"
5108 /*@C
5109     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5110 
5111     Collective on Mat
5112 
5113    Input Parameters:
5114 +    A,B - the matrices in mpiaij format
5115 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5116 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5117 
5118    Output Parameter:
5119 +    rowb, colb - index sets of rows and columns of B to extract
5120 -    B_seq - the sequential matrix generated
5121 
5122     Level: developer
5123 
5124 @*/
5125 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5126 {
5127   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5128   PetscErrorCode ierr;
5129   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5130   IS             isrowb,iscolb;
5131   Mat            *bseq=NULL;
5132 
5133   PetscFunctionBegin;
5134   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5135     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5136   }
5137   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5138 
5139   if (scall == MAT_INITIAL_MATRIX) {
5140     start = A->cmap->rstart;
5141     cmap  = a->garray;
5142     nzA   = a->A->cmap->n;
5143     nzB   = a->B->cmap->n;
5144     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5145     ncols = 0;
5146     for (i=0; i<nzB; i++) {  /* row < local row index */
5147       if (cmap[i] < start) idx[ncols++] = cmap[i];
5148       else break;
5149     }
5150     imark = i;
5151     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5152     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5153     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5154     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5155   } else {
5156     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5157     isrowb  = *rowb; iscolb = *colb;
5158     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5159     bseq[0] = *B_seq;
5160   }
5161   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5162   *B_seq = bseq[0];
5163   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5164   if (!rowb) {
5165     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5166   } else {
5167     *rowb = isrowb;
5168   }
5169   if (!colb) {
5170     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5171   } else {
5172     *colb = iscolb;
5173   }
5174   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5175   PetscFunctionReturn(0);
5176 }
5177 
5178 #undef __FUNCT__
5179 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5180 /*
5181     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5182     of the OFF-DIAGONAL portion of local A
5183 
5184     Collective on Mat
5185 
5186    Input Parameters:
5187 +    A,B - the matrices in mpiaij format
5188 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5189 
5190    Output Parameter:
5191 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5192 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5193 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5194 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5195 
5196     Level: developer
5197 
5198 */
5199 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5200 {
5201   VecScatter_MPI_General *gen_to,*gen_from;
5202   PetscErrorCode         ierr;
5203   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5204   Mat_SeqAIJ             *b_oth;
5205   VecScatter             ctx =a->Mvctx;
5206   MPI_Comm               comm;
5207   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5208   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5209   PetscScalar            *rvalues,*svalues;
5210   MatScalar              *b_otha,*bufa,*bufA;
5211   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5212   MPI_Request            *rwaits = NULL,*swaits = NULL;
5213   MPI_Status             *sstatus,rstatus;
5214   PetscMPIInt            jj;
5215   PetscInt               *cols,sbs,rbs;
5216   PetscScalar            *vals;
5217 
5218   PetscFunctionBegin;
5219   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5220   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5221     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5222   }
5223   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5224   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5225 
5226   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5227   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5228   rvalues  = gen_from->values; /* holds the length of receiving row */
5229   svalues  = gen_to->values;   /* holds the length of sending row */
5230   nrecvs   = gen_from->n;
5231   nsends   = gen_to->n;
5232 
5233   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5234   srow    = gen_to->indices;    /* local row index to be sent */
5235   sstarts = gen_to->starts;
5236   sprocs  = gen_to->procs;
5237   sstatus = gen_to->sstatus;
5238   sbs     = gen_to->bs;
5239   rstarts = gen_from->starts;
5240   rprocs  = gen_from->procs;
5241   rbs     = gen_from->bs;
5242 
5243   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5244   if (scall == MAT_INITIAL_MATRIX) {
5245     /* i-array */
5246     /*---------*/
5247     /*  post receives */
5248     for (i=0; i<nrecvs; i++) {
5249       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5250       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5251       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5252     }
5253 
5254     /* pack the outgoing message */
5255     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5256 
5257     sstartsj[0] = 0;
5258     rstartsj[0] = 0;
5259     len         = 0; /* total length of j or a array to be sent */
5260     k           = 0;
5261     for (i=0; i<nsends; i++) {
5262       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5263       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5264       for (j=0; j<nrows; j++) {
5265         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5266         for (l=0; l<sbs; l++) {
5267           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5268 
5269           rowlen[j*sbs+l] = ncols;
5270 
5271           len += ncols;
5272           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5273         }
5274         k++;
5275       }
5276       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5277 
5278       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5279     }
5280     /* recvs and sends of i-array are completed */
5281     i = nrecvs;
5282     while (i--) {
5283       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5284     }
5285     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5286 
5287     /* allocate buffers for sending j and a arrays */
5288     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5289     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5290 
5291     /* create i-array of B_oth */
5292     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5293 
5294     b_othi[0] = 0;
5295     len       = 0; /* total length of j or a array to be received */
5296     k         = 0;
5297     for (i=0; i<nrecvs; i++) {
5298       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5299       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5300       for (j=0; j<nrows; j++) {
5301         b_othi[k+1] = b_othi[k] + rowlen[j];
5302         len        += rowlen[j]; k++;
5303       }
5304       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5305     }
5306 
5307     /* allocate space for j and a arrrays of B_oth */
5308     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5309     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5310 
5311     /* j-array */
5312     /*---------*/
5313     /*  post receives of j-array */
5314     for (i=0; i<nrecvs; i++) {
5315       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5316       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5317     }
5318 
5319     /* pack the outgoing message j-array */
5320     k = 0;
5321     for (i=0; i<nsends; i++) {
5322       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5323       bufJ  = bufj+sstartsj[i];
5324       for (j=0; j<nrows; j++) {
5325         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5326         for (ll=0; ll<sbs; ll++) {
5327           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5328           for (l=0; l<ncols; l++) {
5329             *bufJ++ = cols[l];
5330           }
5331           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5332         }
5333       }
5334       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5335     }
5336 
5337     /* recvs and sends of j-array are completed */
5338     i = nrecvs;
5339     while (i--) {
5340       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5341     }
5342     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5343   } else if (scall == MAT_REUSE_MATRIX) {
5344     sstartsj = *startsj_s;
5345     rstartsj = *startsj_r;
5346     bufa     = *bufa_ptr;
5347     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5348     b_otha   = b_oth->a;
5349   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5350 
5351   /* a-array */
5352   /*---------*/
5353   /*  post receives of a-array */
5354   for (i=0; i<nrecvs; i++) {
5355     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5356     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5357   }
5358 
5359   /* pack the outgoing message a-array */
5360   k = 0;
5361   for (i=0; i<nsends; i++) {
5362     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5363     bufA  = bufa+sstartsj[i];
5364     for (j=0; j<nrows; j++) {
5365       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5366       for (ll=0; ll<sbs; ll++) {
5367         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5368         for (l=0; l<ncols; l++) {
5369           *bufA++ = vals[l];
5370         }
5371         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5372       }
5373     }
5374     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5375   }
5376   /* recvs and sends of a-array are completed */
5377   i = nrecvs;
5378   while (i--) {
5379     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5380   }
5381   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5382   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5383 
5384   if (scall == MAT_INITIAL_MATRIX) {
5385     /* put together the new matrix */
5386     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5387 
5388     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5389     /* Since these are PETSc arrays, change flags to free them as necessary. */
5390     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5391     b_oth->free_a  = PETSC_TRUE;
5392     b_oth->free_ij = PETSC_TRUE;
5393     b_oth->nonew   = 0;
5394 
5395     ierr = PetscFree(bufj);CHKERRQ(ierr);
5396     if (!startsj_s || !bufa_ptr) {
5397       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5398       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5399     } else {
5400       *startsj_s = sstartsj;
5401       *startsj_r = rstartsj;
5402       *bufa_ptr  = bufa;
5403     }
5404   }
5405   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5406   PetscFunctionReturn(0);
5407 }
5408 
5409 #undef __FUNCT__
5410 #define __FUNCT__ "MatGetCommunicationStructs"
5411 /*@C
5412   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5413 
5414   Not Collective
5415 
5416   Input Parameters:
5417 . A - The matrix in mpiaij format
5418 
5419   Output Parameter:
5420 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5421 . colmap - A map from global column index to local index into lvec
5422 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5423 
5424   Level: developer
5425 
5426 @*/
5427 #if defined(PETSC_USE_CTABLE)
5428 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5429 #else
5430 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5431 #endif
5432 {
5433   Mat_MPIAIJ *a;
5434 
5435   PetscFunctionBegin;
5436   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5437   PetscValidPointer(lvec, 2);
5438   PetscValidPointer(colmap, 3);
5439   PetscValidPointer(multScatter, 4);
5440   a = (Mat_MPIAIJ*) A->data;
5441   if (lvec) *lvec = a->lvec;
5442   if (colmap) *colmap = a->colmap;
5443   if (multScatter) *multScatter = a->Mvctx;
5444   PetscFunctionReturn(0);
5445 }
5446 
5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5448 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5449 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5450 
5451 #undef __FUNCT__
5452 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5453 /*
5454     Computes (B'*A')' since computing B*A directly is untenable
5455 
5456                n                       p                          p
5457         (              )       (              )         (                  )
5458       m (      A       )  *  n (       B      )   =   m (         C        )
5459         (              )       (              )         (                  )
5460 
5461 */
5462 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5463 {
5464   PetscErrorCode ierr;
5465   Mat            At,Bt,Ct;
5466 
5467   PetscFunctionBegin;
5468   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5469   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5470   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5471   ierr = MatDestroy(&At);CHKERRQ(ierr);
5472   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5473   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5474   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5475   PetscFunctionReturn(0);
5476 }
5477 
5478 #undef __FUNCT__
5479 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5480 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5481 {
5482   PetscErrorCode ierr;
5483   PetscInt       m=A->rmap->n,n=B->cmap->n;
5484   Mat            Cmat;
5485 
5486   PetscFunctionBegin;
5487   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5488   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5489   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5490   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5491   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5492   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5493   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5495 
5496   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5497 
5498   *C = Cmat;
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 /* ----------------------------------------------------------------*/
5503 #undef __FUNCT__
5504 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5505 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5506 {
5507   PetscErrorCode ierr;
5508 
5509   PetscFunctionBegin;
5510   if (scall == MAT_INITIAL_MATRIX) {
5511     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5512     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5513     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5514   }
5515   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5516   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5517   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5518   PetscFunctionReturn(0);
5519 }
5520 
5521 #if defined(PETSC_HAVE_MUMPS)
5522 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_PASTIX)
5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_SUPERLU_DIST)
5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5529 #endif
5530 #if defined(PETSC_HAVE_CLIQUE)
5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5532 #endif
5533 
5534 /*MC
5535    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5536 
5537    Options Database Keys:
5538 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5539 
5540   Level: beginner
5541 
5542 .seealso: MatCreateAIJ()
5543 M*/
5544 
5545 #undef __FUNCT__
5546 #define __FUNCT__ "MatCreate_MPIAIJ"
5547 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5548 {
5549   Mat_MPIAIJ     *b;
5550   PetscErrorCode ierr;
5551   PetscMPIInt    size;
5552 
5553   PetscFunctionBegin;
5554   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5555 
5556   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5557   B->data       = (void*)b;
5558   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5559   B->assembled  = PETSC_FALSE;
5560   B->insertmode = NOT_SET_VALUES;
5561   b->size       = size;
5562 
5563   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5564 
5565   /* build cache for off array entries formed */
5566   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5567 
5568   b->donotstash  = PETSC_FALSE;
5569   b->colmap      = 0;
5570   b->garray      = 0;
5571   b->roworiented = PETSC_TRUE;
5572 
5573   /* stuff used for matrix vector multiply */
5574   b->lvec  = NULL;
5575   b->Mvctx = NULL;
5576 
5577   /* stuff for MatGetRow() */
5578   b->rowindices   = 0;
5579   b->rowvalues    = 0;
5580   b->getrowactive = PETSC_FALSE;
5581 
5582   /* flexible pointer used in CUSP/CUSPARSE classes */
5583   b->spptr = NULL;
5584 
5585 #if defined(PETSC_HAVE_MUMPS)
5586   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_PASTIX)
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5590 #endif
5591 #if defined(PETSC_HAVE_SUPERLU_DIST)
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5593 #endif
5594 #if defined(PETSC_HAVE_CLIQUE)
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5596 #endif
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5611   PetscFunctionReturn(0);
5612 }
5613 
5614 #undef __FUNCT__
5615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5616 /*@
5617      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5618          and "off-diagonal" part of the matrix in CSR format.
5619 
5620    Collective on MPI_Comm
5621 
5622    Input Parameters:
5623 +  comm - MPI communicator
5624 .  m - number of local rows (Cannot be PETSC_DECIDE)
5625 .  n - This value should be the same as the local size used in creating the
5626        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5627        calculated if N is given) For square matrices n is almost always m.
5628 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5629 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5630 .   i - row indices for "diagonal" portion of matrix
5631 .   j - column indices
5632 .   a - matrix values
5633 .   oi - row indices for "off-diagonal" portion of matrix
5634 .   oj - column indices
5635 -   oa - matrix values
5636 
5637    Output Parameter:
5638 .   mat - the matrix
5639 
5640    Level: advanced
5641 
5642    Notes:
5643        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5644        must free the arrays once the matrix has been destroyed and not before.
5645 
5646        The i and j indices are 0 based
5647 
5648        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5649 
5650        This sets local rows and cannot be used to set off-processor values.
5651 
5652        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5653        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5654        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5655        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5656        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5657        communication if it is known that only local entries will be set.
5658 
5659 .keywords: matrix, aij, compressed row, sparse, parallel
5660 
5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5662           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5663 @*/
5664 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5665 {
5666   PetscErrorCode ierr;
5667   Mat_MPIAIJ     *maij;
5668 
5669   PetscFunctionBegin;
5670   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5671   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5672   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5673   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5674   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5675   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5676   maij = (Mat_MPIAIJ*) (*mat)->data;
5677 
5678   (*mat)->preallocated = PETSC_TRUE;
5679 
5680   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5681   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5682 
5683   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5684   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5685 
5686   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690 
5691   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5694   PetscFunctionReturn(0);
5695 }
5696 
5697 /*
5698     Special version for direct calls from Fortran
5699 */
5700 #include <petsc-private/fortranimpl.h>
5701 
5702 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5706 #endif
5707 
5708 /* Change these macros so can be used in void function */
5709 #undef CHKERRQ
5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5711 #undef SETERRQ2
5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5713 #undef SETERRQ3
5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5715 #undef SETERRQ
5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5717 
5718 #undef __FUNCT__
5719 #define __FUNCT__ "matsetvaluesmpiaij_"
5720 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5721 {
5722   Mat            mat  = *mmat;
5723   PetscInt       m    = *mm, n = *mn;
5724   InsertMode     addv = *maddv;
5725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5726   PetscScalar    value;
5727   PetscErrorCode ierr;
5728 
5729   MatCheckPreallocated(mat,1);
5730   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5731 
5732 #if defined(PETSC_USE_DEBUG)
5733   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5734 #endif
5735   {
5736     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5737     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5738     PetscBool roworiented = aij->roworiented;
5739 
5740     /* Some Variables required in the macro */
5741     Mat        A                 = aij->A;
5742     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5743     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5744     MatScalar  *aa               = a->a;
5745     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5746     Mat        B                 = aij->B;
5747     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5748     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5749     MatScalar  *ba               = b->a;
5750 
5751     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5752     PetscInt  nonew = a->nonew;
5753     MatScalar *ap1,*ap2;
5754 
5755     PetscFunctionBegin;
5756     for (i=0; i<m; i++) {
5757       if (im[i] < 0) continue;
5758 #if defined(PETSC_USE_DEBUG)
5759       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5760 #endif
5761       if (im[i] >= rstart && im[i] < rend) {
5762         row      = im[i] - rstart;
5763         lastcol1 = -1;
5764         rp1      = aj + ai[row];
5765         ap1      = aa + ai[row];
5766         rmax1    = aimax[row];
5767         nrow1    = ailen[row];
5768         low1     = 0;
5769         high1    = nrow1;
5770         lastcol2 = -1;
5771         rp2      = bj + bi[row];
5772         ap2      = ba + bi[row];
5773         rmax2    = bimax[row];
5774         nrow2    = bilen[row];
5775         low2     = 0;
5776         high2    = nrow2;
5777 
5778         for (j=0; j<n; j++) {
5779           if (roworiented) value = v[i*n+j];
5780           else value = v[i+j*m];
5781           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5782           if (in[j] >= cstart && in[j] < cend) {
5783             col = in[j] - cstart;
5784             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5785           } else if (in[j] < 0) continue;
5786 #if defined(PETSC_USE_DEBUG)
5787           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5788 #endif
5789           else {
5790             if (mat->was_assembled) {
5791               if (!aij->colmap) {
5792                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5793               }
5794 #if defined(PETSC_USE_CTABLE)
5795               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5796               col--;
5797 #else
5798               col = aij->colmap[in[j]] - 1;
5799 #endif
5800               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5801                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5802                 col  =  in[j];
5803                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5804                 B     = aij->B;
5805                 b     = (Mat_SeqAIJ*)B->data;
5806                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5807                 rp2   = bj + bi[row];
5808                 ap2   = ba + bi[row];
5809                 rmax2 = bimax[row];
5810                 nrow2 = bilen[row];
5811                 low2  = 0;
5812                 high2 = nrow2;
5813                 bm    = aij->B->rmap->n;
5814                 ba    = b->a;
5815               }
5816             } else col = in[j];
5817             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5818           }
5819         }
5820       } else if (!aij->donotstash) {
5821         if (roworiented) {
5822           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5823         } else {
5824           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5825         }
5826       }
5827     }
5828   }
5829   PetscFunctionReturnVoid();
5830 }
5831 
5832