xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4dde04f4c835a3d74306cfa47aff66fee8d17a44)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscMPIInt    size   = mat->size;
748   PetscSF        sf;
749   PetscInt      *lrows;
750   PetscSFNode   *rrows;
751   PetscInt       lastidx = -1, r, p = 0, len = 0;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   /* Create SF where leaves are input rows and roots are owned rows */
756   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
757   for (r = 0; r < n; ++r) lrows[r] = -1;
758   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
759   for (r = 0; r < N; ++r) {
760     const PetscInt idx   = rows[r];
761     PetscBool      found = PETSC_FALSE;
762     /* Trick for efficient searching for sorted rows */
763     if (lastidx > idx) p = 0;
764     lastidx = idx;
765     for (; p < size; ++p) {
766       if (idx >= owners[p] && idx < owners[p+1]) {
767         rrows[r].rank  = p;
768         rrows[r].index = rows[r] - owners[p];
769         found = PETSC_TRUE;
770         break;
771       }
772     }
773     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
774   }
775   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
776   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
777   /* Collect flags for rows to be zeroed */
778   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
779   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
780   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
781   /* Compress and put in row numbers */
782   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
783   /* fix right hand side if needed */
784   if (x && b) {
785     const PetscScalar *xx;
786     PetscScalar       *bb;
787 
788     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
789     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
790     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
791     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
793   }
794   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
795   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
796   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936 
937   /* only change matrix nonzero state if pattern was allowed to be changed */
938   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
939     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
940     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPIAIJ"
947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
950   PetscErrorCode ierr;
951   PetscInt       nt;
952 
953   PetscFunctionBegin;
954   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
955   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
956   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
957   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
958   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMultAdd_MPIAIJ"
977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
985   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   PetscErrorCode ierr;
996   PetscBool      merged;
997 
998   PetscFunctionBegin;
999   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1000   /* do nondiagonal part */
1001   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1002   if (!merged) {
1003     /* send it on its way */
1004     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* receive remote parts: note this assumes the values are not actually */
1008     /* added in yy until the next line, */
1009     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   } else {
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* send it on its way */
1014     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1015     /* values actually were received in the Begin() but we need to call this nop */
1016     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1023 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1024 {
1025   MPI_Comm       comm;
1026   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1027   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1028   IS             Me,Notme;
1029   PetscErrorCode ierr;
1030   PetscInt       M,N,first,last,*notme,i;
1031   PetscMPIInt    size;
1032 
1033   PetscFunctionBegin;
1034   /* Easy test: symmetric diagonal block */
1035   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1036   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1037   if (!*f) PetscFunctionReturn(0);
1038   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1040   if (size == 1) PetscFunctionReturn(0);
1041 
1042   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1043   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1044   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1045   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1046   for (i=0; i<first; i++) notme[i] = i;
1047   for (i=last; i<M; i++) notme[i-last+first] = i;
1048   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1049   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1050   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1051   Aoff = Aoffs[0];
1052   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1053   Boff = Boffs[0];
1054   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1055   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1056   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1057   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1058   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1059   ierr = PetscFree(notme);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1066 {
1067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   /* do nondiagonal part */
1072   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1073   /* send it on its way */
1074   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   /* do local part */
1076   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1077   /* receive remote parts */
1078   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1089 {
1090   PetscErrorCode ierr;
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092 
1093   PetscFunctionBegin;
1094   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1095   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1096   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatScale_MPIAIJ"
1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1109   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatDestroy_Redundant"
1115 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1116 {
1117   PetscErrorCode ierr;
1118   Mat_Redundant  *redund = *redundant;
1119   PetscInt       i;
1120 
1121   PetscFunctionBegin;
1122   *redundant = NULL;
1123   if (redund){
1124     if (redund->matseq) { /* via MatGetSubMatrices()  */
1125       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1126       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1127       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1128       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1129     } else {
1130       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1131       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1132       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1133       for (i=0; i<redund->nrecvs; i++) {
1134         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1135         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1136       }
1137       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1138     }
1139 
1140     if (redund->psubcomm) {
1141       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1142     }
1143     ierr = PetscFree(redund);CHKERRQ(ierr);
1144   }
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatDestroy_MPIAIJ"
1150 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1151 {
1152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156 #if defined(PETSC_USE_LOG)
1157   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1158 #endif
1159   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1160   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1161   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1162   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1164 #if defined(PETSC_USE_CTABLE)
1165   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1166 #else
1167   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1168 #endif
1169   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1170   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1171   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1172   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1173   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1174   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1175 
1176   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1190 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1191 {
1192   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1193   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1194   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1195   PetscErrorCode ierr;
1196   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1197   int            fd;
1198   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1199   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1200   PetscScalar    *column_values;
1201   PetscInt       message_count,flowcontrolcount;
1202   FILE           *file;
1203 
1204   PetscFunctionBegin;
1205   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1206   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1207   nz   = A->nz + B->nz;
1208   if (!rank) {
1209     header[0] = MAT_FILE_CLASSID;
1210     header[1] = mat->rmap->N;
1211     header[2] = mat->cmap->N;
1212 
1213     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1214     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1215     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1216     /* get largest number of rows any processor has */
1217     rlen  = mat->rmap->n;
1218     range = mat->rmap->range;
1219     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1220   } else {
1221     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1222     rlen = mat->rmap->n;
1223   }
1224 
1225   /* load up the local row counts */
1226   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1227   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1228 
1229   /* store the row lengths to the file */
1230   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1231   if (!rank) {
1232     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     for (i=1; i<size; i++) {
1234       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1235       rlen = range[i+1] - range[i];
1236       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1238     }
1239     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1240   } else {
1241     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1242     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1243     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1244   }
1245   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1246 
1247   /* load up the local column indices */
1248   nzmax = nz; /* th processor needs space a largest processor needs */
1249   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1251   cnt   = 0;
1252   for (i=0; i<mat->rmap->n; i++) {
1253     for (j=B->i[i]; j<B->i[i+1]; j++) {
1254       if ((col = garray[B->j[j]]) > cstart) break;
1255       column_indices[cnt++] = col;
1256     }
1257     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1258     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1259   }
1260   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1261 
1262   /* store the column indices to the file */
1263   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1264   if (!rank) {
1265     MPI_Status status;
1266     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1267     for (i=1; i<size; i++) {
1268       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1269       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1270       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1271       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1273     }
1274     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1275   } else {
1276     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1277     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1279     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1280   }
1281   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1282 
1283   /* load up the local column values */
1284   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1285   cnt  = 0;
1286   for (i=0; i<mat->rmap->n; i++) {
1287     for (j=B->i[i]; j<B->i[i+1]; j++) {
1288       if (garray[B->j[j]] > cstart) break;
1289       column_values[cnt++] = B->a[j];
1290     }
1291     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1292     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1293   }
1294   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1295 
1296   /* store the column values to the file */
1297   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1298   if (!rank) {
1299     MPI_Status status;
1300     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1301     for (i=1; i<size; i++) {
1302       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1303       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1304       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1305       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1306       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1307     }
1308     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1309   } else {
1310     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1311     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1312     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1313     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1314   }
1315   ierr = PetscFree(column_values);CHKERRQ(ierr);
1316 
1317   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1318   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #include <petscdraw.h>
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1325 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1326 {
1327   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1328   PetscErrorCode    ierr;
1329   PetscMPIInt       rank = aij->rank,size = aij->size;
1330   PetscBool         isdraw,iascii,isbinary;
1331   PetscViewer       sviewer;
1332   PetscViewerFormat format;
1333 
1334   PetscFunctionBegin;
1335   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1336   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1337   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1338   if (iascii) {
1339     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1340     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1341       MatInfo   info;
1342       PetscBool inodes;
1343 
1344       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1345       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1346       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1348       if (!inodes) {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       } else {
1352         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1353                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1354       }
1355       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1356       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1357       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1359       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1361       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1362       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1363       PetscFunctionReturn(0);
1364     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1365       PetscInt inodecount,inodelimit,*inodes;
1366       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1367       if (inodes) {
1368         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1369       } else {
1370         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1371       }
1372       PetscFunctionReturn(0);
1373     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1374       PetscFunctionReturn(0);
1375     }
1376   } else if (isbinary) {
1377     if (size == 1) {
1378       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1379       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1380     } else {
1381       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1382     }
1383     PetscFunctionReturn(0);
1384   } else if (isdraw) {
1385     PetscDraw draw;
1386     PetscBool isnull;
1387     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1388     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1389   }
1390 
1391   {
1392     /* assemble the entire matrix onto first processor. */
1393     Mat        A;
1394     Mat_SeqAIJ *Aloc;
1395     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1396     MatScalar  *a;
1397 
1398     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1399     if (!rank) {
1400       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1401     } else {
1402       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1403     }
1404     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1405     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1406     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1407     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1408     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1409 
1410     /* copy over the A part */
1411     Aloc = (Mat_SeqAIJ*)aij->A->data;
1412     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1413     row  = mat->rmap->rstart;
1414     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1415     for (i=0; i<m; i++) {
1416       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1417       row++;
1418       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1419     }
1420     aj = Aloc->j;
1421     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1422 
1423     /* copy over the B part */
1424     Aloc = (Mat_SeqAIJ*)aij->B->data;
1425     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1426     row  = mat->rmap->rstart;
1427     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1428     ct   = cols;
1429     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1430     for (i=0; i<m; i++) {
1431       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1432       row++;
1433       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1434     }
1435     ierr = PetscFree(ct);CHKERRQ(ierr);
1436     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1437     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1438     /*
1439        Everyone has to call to draw the matrix since the graphics waits are
1440        synchronized across all processors that share the PetscDraw object
1441     */
1442     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1443     if (!rank) {
1444       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1445     }
1446     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1447     ierr = MatDestroy(&A);CHKERRQ(ierr);
1448   }
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatView_MPIAIJ"
1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1455 {
1456   PetscErrorCode ierr;
1457   PetscBool      iascii,isdraw,issocket,isbinary;
1458 
1459   PetscFunctionBegin;
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1463   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1464   if (iascii || isdraw || isbinary || issocket) {
1465     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatSOR_MPIAIJ"
1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1473 {
1474   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1475   PetscErrorCode ierr;
1476   Vec            bb1 = 0;
1477   PetscBool      hasop;
1478 
1479   PetscFunctionBegin;
1480   if (flag == SOR_APPLY_UPPER) {
1481     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1482     PetscFunctionReturn(0);
1483   }
1484 
1485   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1486     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1487   }
1488 
1489   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1490     if (flag & SOR_ZERO_INITIAL_GUESS) {
1491       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1492       its--;
1493     }
1494 
1495     while (its--) {
1496       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498 
1499       /* update rhs: bb1 = bb - B*x */
1500       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1501       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1502 
1503       /* local sweep */
1504       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1505     }
1506   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1507     if (flag & SOR_ZERO_INITIAL_GUESS) {
1508       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1509       its--;
1510     }
1511     while (its--) {
1512       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514 
1515       /* update rhs: bb1 = bb - B*x */
1516       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1517       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1518 
1519       /* local sweep */
1520       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1521     }
1522   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1523     if (flag & SOR_ZERO_INITIAL_GUESS) {
1524       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1525       its--;
1526     }
1527     while (its--) {
1528       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530 
1531       /* update rhs: bb1 = bb - B*x */
1532       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1533       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1534 
1535       /* local sweep */
1536       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1537     }
1538   } else if (flag & SOR_EISENSTAT) {
1539     Vec xx1;
1540 
1541     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1542     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1543 
1544     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546     if (!mat->diag) {
1547       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1548       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1549     }
1550     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1551     if (hasop) {
1552       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1553     } else {
1554       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1555     }
1556     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1557 
1558     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1559 
1560     /* local sweep */
1561     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1562     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1563     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1564   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1565 
1566   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatPermute_MPIAIJ"
1572 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1573 {
1574   Mat            aA,aB,Aperm;
1575   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1576   PetscScalar    *aa,*ba;
1577   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1578   PetscSF        rowsf,sf;
1579   IS             parcolp = NULL;
1580   PetscBool      done;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1585   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1586   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1587   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1588 
1589   /* Invert row permutation to find out where my rows should go */
1590   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1591   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1592   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1593   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1594   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1596 
1597   /* Invert column permutation to find out where my columns should go */
1598   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1599   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1600   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1601   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1602   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1605 
1606   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1607   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1608   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1609 
1610   /* Find out where my gcols should go */
1611   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1612   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1613   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1614   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1615   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1616   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1617   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1619 
1620   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1621   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1622   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1623   for (i=0; i<m; i++) {
1624     PetscInt row = rdest[i],rowner;
1625     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1626     for (j=ai[i]; j<ai[i+1]; j++) {
1627       PetscInt cowner,col = cdest[aj[j]];
1628       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1629       if (rowner == cowner) dnnz[i]++;
1630       else onnz[i]++;
1631     }
1632     for (j=bi[i]; j<bi[i+1]; j++) {
1633       PetscInt cowner,col = gcdest[bj[j]];
1634       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1635       if (rowner == cowner) dnnz[i]++;
1636       else onnz[i]++;
1637     }
1638   }
1639   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1640   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1641   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1642   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1643   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1644 
1645   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1646   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1647   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) {
1649     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1650     PetscInt j0,rowlen;
1651     rowlen = ai[i+1] - ai[i];
1652     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1653       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1654       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1655     }
1656     rowlen = bi[i+1] - bi[i];
1657     for (j0=j=0; j<rowlen; j0=j) {
1658       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1659       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1660     }
1661   }
1662   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1665   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1666   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1667   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1668   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1669   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1670   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1671   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1672   *B = Aperm;
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1678 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1679 {
1680   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1681   Mat            A    = mat->A,B = mat->B;
1682   PetscErrorCode ierr;
1683   PetscReal      isend[5],irecv[5];
1684 
1685   PetscFunctionBegin;
1686   info->block_size = 1.0;
1687   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1688 
1689   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1690   isend[3] = info->memory;  isend[4] = info->mallocs;
1691 
1692   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1693 
1694   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1695   isend[3] += info->memory;  isend[4] += info->mallocs;
1696   if (flag == MAT_LOCAL) {
1697     info->nz_used      = isend[0];
1698     info->nz_allocated = isend[1];
1699     info->nz_unneeded  = isend[2];
1700     info->memory       = isend[3];
1701     info->mallocs      = isend[4];
1702   } else if (flag == MAT_GLOBAL_MAX) {
1703     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1704 
1705     info->nz_used      = irecv[0];
1706     info->nz_allocated = irecv[1];
1707     info->nz_unneeded  = irecv[2];
1708     info->memory       = irecv[3];
1709     info->mallocs      = irecv[4];
1710   } else if (flag == MAT_GLOBAL_SUM) {
1711     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1712 
1713     info->nz_used      = irecv[0];
1714     info->nz_allocated = irecv[1];
1715     info->nz_unneeded  = irecv[2];
1716     info->memory       = irecv[3];
1717     info->mallocs      = irecv[4];
1718   }
1719   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1720   info->fill_ratio_needed = 0;
1721   info->factor_mallocs    = 0;
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatSetOption_MPIAIJ"
1727 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1728 {
1729   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   switch (op) {
1734   case MAT_NEW_NONZERO_LOCATIONS:
1735   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1736   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1737   case MAT_KEEP_NONZERO_PATTERN:
1738   case MAT_NEW_NONZERO_LOCATION_ERR:
1739   case MAT_USE_INODES:
1740   case MAT_IGNORE_ZERO_ENTRIES:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_ROW_ORIENTED:
1746     a->roworiented = flg;
1747 
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_NEW_DIAGONALS:
1752     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1753     break;
1754   case MAT_IGNORE_OFF_PROC_ENTRIES:
1755     a->donotstash = flg;
1756     break;
1757   case MAT_SPD:
1758     A->spd_set = PETSC_TRUE;
1759     A->spd     = flg;
1760     if (flg) {
1761       A->symmetric                  = PETSC_TRUE;
1762       A->structurally_symmetric     = PETSC_TRUE;
1763       A->symmetric_set              = PETSC_TRUE;
1764       A->structurally_symmetric_set = PETSC_TRUE;
1765     }
1766     break;
1767   case MAT_SYMMETRIC:
1768     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_STRUCTURALLY_SYMMETRIC:
1771     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1772     break;
1773   case MAT_HERMITIAN:
1774     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1775     break;
1776   case MAT_SYMMETRY_ETERNAL:
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     break;
1779   default:
1780     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1781   }
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatGetRow_MPIAIJ"
1787 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1788 {
1789   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1790   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1791   PetscErrorCode ierr;
1792   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1793   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1794   PetscInt       *cmap,*idx_p;
1795 
1796   PetscFunctionBegin;
1797   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1798   mat->getrowactive = PETSC_TRUE;
1799 
1800   if (!mat->rowvalues && (idx || v)) {
1801     /*
1802         allocate enough space to hold information from the longest row.
1803     */
1804     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1805     PetscInt   max = 1,tmp;
1806     for (i=0; i<matin->rmap->n; i++) {
1807       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1808       if (max < tmp) max = tmp;
1809     }
1810     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1811   }
1812 
1813   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1814   lrow = row - rstart;
1815 
1816   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1817   if (!v)   {pvA = 0; pvB = 0;}
1818   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1819   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1820   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1821   nztot = nzA + nzB;
1822 
1823   cmap = mat->garray;
1824   if (v  || idx) {
1825     if (nztot) {
1826       /* Sort by increasing column numbers, assuming A and B already sorted */
1827       PetscInt imark = -1;
1828       if (v) {
1829         *v = v_p = mat->rowvalues;
1830         for (i=0; i<nzB; i++) {
1831           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1832           else break;
1833         }
1834         imark = i;
1835         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1836         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1837       }
1838       if (idx) {
1839         *idx = idx_p = mat->rowindices;
1840         if (imark > -1) {
1841           for (i=0; i<imark; i++) {
1842             idx_p[i] = cmap[cworkB[i]];
1843           }
1844         } else {
1845           for (i=0; i<nzB; i++) {
1846             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1847             else break;
1848           }
1849           imark = i;
1850         }
1851         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1852         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1853       }
1854     } else {
1855       if (idx) *idx = 0;
1856       if (v)   *v   = 0;
1857     }
1858   }
1859   *nz  = nztot;
1860   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1861   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1867 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1868 {
1869   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1870 
1871   PetscFunctionBegin;
1872   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1873   aij->getrowactive = PETSC_FALSE;
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatNorm_MPIAIJ"
1879 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1880 {
1881   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1882   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1883   PetscErrorCode ierr;
1884   PetscInt       i,j,cstart = mat->cmap->rstart;
1885   PetscReal      sum = 0.0;
1886   MatScalar      *v;
1887 
1888   PetscFunctionBegin;
1889   if (aij->size == 1) {
1890     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1891   } else {
1892     if (type == NORM_FROBENIUS) {
1893       v = amat->a;
1894       for (i=0; i<amat->nz; i++) {
1895         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1896       }
1897       v = bmat->a;
1898       for (i=0; i<bmat->nz; i++) {
1899         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1900       }
1901       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1902       *norm = PetscSqrtReal(*norm);
1903     } else if (type == NORM_1) { /* max column norm */
1904       PetscReal *tmp,*tmp2;
1905       PetscInt  *jj,*garray = aij->garray;
1906       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1907       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1908       *norm = 0.0;
1909       v     = amat->a; jj = amat->j;
1910       for (j=0; j<amat->nz; j++) {
1911         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1912       }
1913       v = bmat->a; jj = bmat->j;
1914       for (j=0; j<bmat->nz; j++) {
1915         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1916       }
1917       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1918       for (j=0; j<mat->cmap->N; j++) {
1919         if (tmp2[j] > *norm) *norm = tmp2[j];
1920       }
1921       ierr = PetscFree(tmp);CHKERRQ(ierr);
1922       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1923     } else if (type == NORM_INFINITY) { /* max row norm */
1924       PetscReal ntemp = 0.0;
1925       for (j=0; j<aij->A->rmap->n; j++) {
1926         v   = amat->a + amat->i[j];
1927         sum = 0.0;
1928         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1929           sum += PetscAbsScalar(*v); v++;
1930         }
1931         v = bmat->a + bmat->i[j];
1932         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1933           sum += PetscAbsScalar(*v); v++;
1934         }
1935         if (sum > ntemp) ntemp = sum;
1936       }
1937       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1938     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1939   }
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatTranspose_MPIAIJ"
1945 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1946 {
1947   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1948   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1949   PetscErrorCode ierr;
1950   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1951   PetscInt       cstart = A->cmap->rstart,ncol;
1952   Mat            B;
1953   MatScalar      *array;
1954 
1955   PetscFunctionBegin;
1956   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1957 
1958   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1959   ai = Aloc->i; aj = Aloc->j;
1960   bi = Bloc->i; bj = Bloc->j;
1961   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1962     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1963     PetscSFNode          *oloc;
1964     PETSC_UNUSED PetscSF sf;
1965 
1966     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1967     /* compute d_nnz for preallocation */
1968     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1969     for (i=0; i<ai[ma]; i++) {
1970       d_nnz[aj[i]]++;
1971       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1972     }
1973     /* compute local off-diagonal contributions */
1974     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1975     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1976     /* map those to global */
1977     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1978     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1979     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1980     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1981     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1982     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1983     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1984 
1985     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1986     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1987     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1988     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1989     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1990     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1991   } else {
1992     B    = *matout;
1993     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1994     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1995   }
1996 
1997   /* copy over the A part */
1998   array = Aloc->a;
1999   row   = A->rmap->rstart;
2000   for (i=0; i<ma; i++) {
2001     ncol = ai[i+1]-ai[i];
2002     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2003     row++;
2004     array += ncol; aj += ncol;
2005   }
2006   aj = Aloc->j;
2007   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2008 
2009   /* copy over the B part */
2010   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2011   array = Bloc->a;
2012   row   = A->rmap->rstart;
2013   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2014   cols_tmp = cols;
2015   for (i=0; i<mb; i++) {
2016     ncol = bi[i+1]-bi[i];
2017     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2018     row++;
2019     array += ncol; cols_tmp += ncol;
2020   }
2021   ierr = PetscFree(cols);CHKERRQ(ierr);
2022 
2023   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2026     *matout = B;
2027   } else {
2028     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2035 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2036 {
2037   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2038   Mat            a    = aij->A,b = aij->B;
2039   PetscErrorCode ierr;
2040   PetscInt       s1,s2,s3;
2041 
2042   PetscFunctionBegin;
2043   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2044   if (rr) {
2045     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2046     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2047     /* Overlap communication with computation. */
2048     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2049   }
2050   if (ll) {
2051     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2052     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2053     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2054   }
2055   /* scale  the diagonal block */
2056   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2057 
2058   if (rr) {
2059     /* Do a scatter end and then right scale the off-diagonal block */
2060     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2061     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2062   }
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2068 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2069 {
2070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2071   PetscErrorCode ierr;
2072 
2073   PetscFunctionBegin;
2074   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatEqual_MPIAIJ"
2080 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2081 {
2082   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2083   Mat            a,b,c,d;
2084   PetscBool      flg;
2085   PetscErrorCode ierr;
2086 
2087   PetscFunctionBegin;
2088   a = matA->A; b = matA->B;
2089   c = matB->A; d = matB->B;
2090 
2091   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2092   if (flg) {
2093     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2094   }
2095   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatCopy_MPIAIJ"
2101 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2102 {
2103   PetscErrorCode ierr;
2104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2105   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2106 
2107   PetscFunctionBegin;
2108   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2109   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2110     /* because of the column compression in the off-processor part of the matrix a->B,
2111        the number of columns in a->B and b->B may be different, hence we cannot call
2112        the MatCopy() directly on the two parts. If need be, we can provide a more
2113        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2114        then copying the submatrices */
2115     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2116   } else {
2117     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2118     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatSetUp_MPIAIJ"
2125 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2126 {
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*
2135    Computes the number of nonzeros per row needed for preallocation when X and Y
2136    have different nonzero structure.
2137 */
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2140 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2141 {
2142   PetscInt       i,j,k,nzx,nzy;
2143 
2144   PetscFunctionBegin;
2145   /* Set the number of nonzeros in the new matrix */
2146   for (i=0; i<m; i++) {
2147     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2148     nzx = xi[i+1] - xi[i];
2149     nzy = yi[i+1] - yi[i];
2150     nnz[i] = 0;
2151     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2152       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2153       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2154       nnz[i]++;
2155     }
2156     for (; k<nzy; k++) nnz[i]++;
2157   }
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2164 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2165 {
2166   PetscErrorCode ierr;
2167   PetscInt       m = Y->rmap->N;
2168   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2169   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatAXPY_MPIAIJ"
2178 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2179 {
2180   PetscErrorCode ierr;
2181   PetscInt       i;
2182   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2183   PetscBLASInt   bnz,one=1;
2184   Mat_SeqAIJ     *x,*y;
2185 
2186   PetscFunctionBegin;
2187   if (str == SAME_NONZERO_PATTERN) {
2188     PetscScalar alpha = a;
2189     x    = (Mat_SeqAIJ*)xx->A->data;
2190     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2191     y    = (Mat_SeqAIJ*)yy->A->data;
2192     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2193     x    = (Mat_SeqAIJ*)xx->B->data;
2194     y    = (Mat_SeqAIJ*)yy->B->data;
2195     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2196     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2197     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2198   } else if (str == SUBSET_NONZERO_PATTERN) {
2199     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2200 
2201     x = (Mat_SeqAIJ*)xx->B->data;
2202     y = (Mat_SeqAIJ*)yy->B->data;
2203     if (y->xtoy && y->XtoY != xx->B) {
2204       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2205       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2206     }
2207     if (!y->xtoy) { /* get xtoy */
2208       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2209       y->XtoY = xx->B;
2210       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2211     }
2212     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2213     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2214   } else {
2215     Mat      B;
2216     PetscInt *nnz_d,*nnz_o;
2217     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2218     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2219     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2220     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2221     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2222     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2223     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2224     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2225     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2226     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2227     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2228     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2229     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2230     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2231   }
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatConjugate_MPIAIJ"
2239 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2240 {
2241 #if defined(PETSC_USE_COMPLEX)
2242   PetscErrorCode ierr;
2243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2244 
2245   PetscFunctionBegin;
2246   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2247   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2248 #else
2249   PetscFunctionBegin;
2250 #endif
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatRealPart_MPIAIJ"
2256 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2257 {
2258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2259   PetscErrorCode ierr;
2260 
2261   PetscFunctionBegin;
2262   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2263   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2269 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2270 {
2271   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2272   PetscErrorCode ierr;
2273 
2274   PetscFunctionBegin;
2275   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2276   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #if defined(PETSC_HAVE_PBGL)
2281 
2282 #include <boost/parallel/mpi/bsp_process_group.hpp>
2283 #include <boost/graph/distributed/ilu_default_graph.hpp>
2284 #include <boost/graph/distributed/ilu_0_block.hpp>
2285 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2286 #include <boost/graph/distributed/petsc/interface.hpp>
2287 #include <boost/multi_array.hpp>
2288 #include <boost/parallel/distributed_property_map->hpp>
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2292 /*
2293   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2294 */
2295 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2296 {
2297   namespace petsc = boost::distributed::petsc;
2298 
2299   namespace graph_dist = boost::graph::distributed;
2300   using boost::graph::distributed::ilu_default::process_group_type;
2301   using boost::graph::ilu_permuted;
2302 
2303   PetscBool      row_identity, col_identity;
2304   PetscContainer c;
2305   PetscInt       m, n, M, N;
2306   PetscErrorCode ierr;
2307 
2308   PetscFunctionBegin;
2309   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2310   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2311   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2312   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2313 
2314   process_group_type pg;
2315   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2316   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2317   lgraph_type& level_graph = *lgraph_p;
2318   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2319 
2320   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2321   ilu_permuted(level_graph);
2322 
2323   /* put together the new matrix */
2324   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2325   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2326   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2327   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2328   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2329   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2330   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2331   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2332 
2333   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2334   ierr = PetscContainerSetPointer(c, lgraph_p);
2335   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2336   ierr = PetscContainerDestroy(&c);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2342 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2343 {
2344   PetscFunctionBegin;
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatSolve_MPIAIJ"
2350 /*
2351   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2352 */
2353 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2354 {
2355   namespace graph_dist = boost::graph::distributed;
2356 
2357   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2358   lgraph_type    *lgraph_p;
2359   PetscContainer c;
2360   PetscErrorCode ierr;
2361 
2362   PetscFunctionBegin;
2363   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2364   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2365   ierr = VecCopy(b, x);CHKERRQ(ierr);
2366 
2367   PetscScalar *array_x;
2368   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2369   PetscInt sx;
2370   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2371 
2372   PetscScalar *array_b;
2373   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2374   PetscInt sb;
2375   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2376 
2377   lgraph_type& level_graph = *lgraph_p;
2378   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2379 
2380   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2381   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2382   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2383 
2384   typedef boost::iterator_property_map<array_ref_type::iterator,
2385                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2386   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2387   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2388 
2389   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2390   PetscFunctionReturn(0);
2391 }
2392 #endif
2393 
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2397 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2398 {
2399   PetscMPIInt    rank,size;
2400   MPI_Comm       comm;
2401   PetscErrorCode ierr;
2402   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2403   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2404   PetscInt       *rowrange = mat->rmap->range;
2405   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2406   Mat            A = aij->A,B=aij->B,C=*matredundant;
2407   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2408   PetscScalar    *sbuf_a;
2409   PetscInt       nzlocal=a->nz+b->nz;
2410   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2411   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2412   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2413   MatScalar      *aworkA,*aworkB;
2414   PetscScalar    *vals;
2415   PetscMPIInt    tag1,tag2,tag3,imdex;
2416   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2417   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2418   MPI_Status     recv_status,*send_status;
2419   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2420   PetscInt       **rbuf_j=NULL;
2421   PetscScalar    **rbuf_a=NULL;
2422   Mat_Redundant  *redund =NULL;
2423 
2424   PetscFunctionBegin;
2425   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2426   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2427   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2428   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2429   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2430 
2431   if (reuse == MAT_REUSE_MATRIX) {
2432     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2433     if (subsize == 1) {
2434       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2435       redund = c->redundant;
2436     } else {
2437       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2438       redund = c->redundant;
2439     }
2440     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2441 
2442     nsends    = redund->nsends;
2443     nrecvs    = redund->nrecvs;
2444     send_rank = redund->send_rank;
2445     recv_rank = redund->recv_rank;
2446     sbuf_nz   = redund->sbuf_nz;
2447     rbuf_nz   = redund->rbuf_nz;
2448     sbuf_j    = redund->sbuf_j;
2449     sbuf_a    = redund->sbuf_a;
2450     rbuf_j    = redund->rbuf_j;
2451     rbuf_a    = redund->rbuf_a;
2452   }
2453 
2454   if (reuse == MAT_INITIAL_MATRIX) {
2455     PetscInt    nleftover,np_subcomm;
2456 
2457     /* get the destination processors' id send_rank, nsends and nrecvs */
2458     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2459 
2460     np_subcomm = size/nsubcomm;
2461     nleftover  = size - nsubcomm*np_subcomm;
2462 
2463     /* block of codes below is specific for INTERLACED */
2464     /* ------------------------------------------------*/
2465     nsends = 0; nrecvs = 0;
2466     for (i=0; i<size; i++) {
2467       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2468         send_rank[nsends++] = i;
2469         recv_rank[nrecvs++] = i;
2470       }
2471     }
2472     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2473       i = size-nleftover-1;
2474       j = 0;
2475       while (j < nsubcomm - nleftover) {
2476         send_rank[nsends++] = i;
2477         i--; j++;
2478       }
2479     }
2480 
2481     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2482       for (i=0; i<nleftover; i++) {
2483         recv_rank[nrecvs++] = size-nleftover+i;
2484       }
2485     }
2486     /*----------------------------------------------*/
2487 
2488     /* allocate sbuf_j, sbuf_a */
2489     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2490     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2491     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2492     /*
2493     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2494     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2495      */
2496   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2497 
2498   /* copy mat's local entries into the buffers */
2499   if (reuse == MAT_INITIAL_MATRIX) {
2500     rownz_max = 0;
2501     rptr      = sbuf_j;
2502     cols      = sbuf_j + rend-rstart + 1;
2503     vals      = sbuf_a;
2504     rptr[0]   = 0;
2505     for (i=0; i<rend-rstart; i++) {
2506       row    = i + rstart;
2507       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2508       ncols  = nzA + nzB;
2509       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2510       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2511       /* load the column indices for this row into cols */
2512       lwrite = 0;
2513       for (l=0; l<nzB; l++) {
2514         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2515           vals[lwrite]   = aworkB[l];
2516           cols[lwrite++] = ctmp;
2517         }
2518       }
2519       for (l=0; l<nzA; l++) {
2520         vals[lwrite]   = aworkA[l];
2521         cols[lwrite++] = cstart + cworkA[l];
2522       }
2523       for (l=0; l<nzB; l++) {
2524         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2525           vals[lwrite]   = aworkB[l];
2526           cols[lwrite++] = ctmp;
2527         }
2528       }
2529       vals     += ncols;
2530       cols     += ncols;
2531       rptr[i+1] = rptr[i] + ncols;
2532       if (rownz_max < ncols) rownz_max = ncols;
2533     }
2534     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2535   } else { /* only copy matrix values into sbuf_a */
2536     rptr    = sbuf_j;
2537     vals    = sbuf_a;
2538     rptr[0] = 0;
2539     for (i=0; i<rend-rstart; i++) {
2540       row    = i + rstart;
2541       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2542       ncols  = nzA + nzB;
2543       cworkB = b->j + b->i[i];
2544       aworkA = a->a + a->i[i];
2545       aworkB = b->a + b->i[i];
2546       lwrite = 0;
2547       for (l=0; l<nzB; l++) {
2548         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2549       }
2550       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2551       for (l=0; l<nzB; l++) {
2552         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2553       }
2554       vals     += ncols;
2555       rptr[i+1] = rptr[i] + ncols;
2556     }
2557   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2558 
2559   /* send nzlocal to others, and recv other's nzlocal */
2560   /*--------------------------------------------------*/
2561   if (reuse == MAT_INITIAL_MATRIX) {
2562     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2563 
2564     s_waits2 = s_waits3 + nsends;
2565     s_waits1 = s_waits2 + nsends;
2566     r_waits1 = s_waits1 + nsends;
2567     r_waits2 = r_waits1 + nrecvs;
2568     r_waits3 = r_waits2 + nrecvs;
2569   } else {
2570     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2571 
2572     r_waits3 = s_waits3 + nsends;
2573   }
2574 
2575   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2576   if (reuse == MAT_INITIAL_MATRIX) {
2577     /* get new tags to keep the communication clean */
2578     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2579     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2580     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2581 
2582     /* post receives of other's nzlocal */
2583     for (i=0; i<nrecvs; i++) {
2584       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2585     }
2586     /* send nzlocal to others */
2587     for (i=0; i<nsends; i++) {
2588       sbuf_nz[i] = nzlocal;
2589       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2590     }
2591     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2592     count = nrecvs;
2593     while (count) {
2594       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2595 
2596       recv_rank[imdex] = recv_status.MPI_SOURCE;
2597       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2598       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2599 
2600       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2601 
2602       rbuf_nz[imdex] += i + 2;
2603 
2604       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2605       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2606       count--;
2607     }
2608     /* wait on sends of nzlocal */
2609     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2610     /* send mat->i,j to others, and recv from other's */
2611     /*------------------------------------------------*/
2612     for (i=0; i<nsends; i++) {
2613       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2614       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2615     }
2616     /* wait on receives of mat->i,j */
2617     /*------------------------------*/
2618     count = nrecvs;
2619     while (count) {
2620       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2621       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2622       count--;
2623     }
2624     /* wait on sends of mat->i,j */
2625     /*---------------------------*/
2626     if (nsends) {
2627       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2628     }
2629   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2630 
2631   /* post receives, send and receive mat->a */
2632   /*----------------------------------------*/
2633   for (imdex=0; imdex<nrecvs; imdex++) {
2634     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2635   }
2636   for (i=0; i<nsends; i++) {
2637     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2638   }
2639   count = nrecvs;
2640   while (count) {
2641     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2642     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2643     count--;
2644   }
2645   if (nsends) {
2646     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2647   }
2648 
2649   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2650 
2651   /* create redundant matrix */
2652   /*-------------------------*/
2653   if (reuse == MAT_INITIAL_MATRIX) {
2654     const PetscInt *range;
2655     PetscInt       rstart_sub,rend_sub,mloc_sub;
2656 
2657     /* compute rownz_max for preallocation */
2658     for (imdex=0; imdex<nrecvs; imdex++) {
2659       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2660       rptr = rbuf_j[imdex];
2661       for (i=0; i<j; i++) {
2662         ncols = rptr[i+1] - rptr[i];
2663         if (rownz_max < ncols) rownz_max = ncols;
2664       }
2665     }
2666 
2667     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2668 
2669     /* get local size of redundant matrix
2670        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2671     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2672     rstart_sub = range[nsubcomm*subrank];
2673     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2674       rend_sub = range[nsubcomm*(subrank+1)];
2675     } else {
2676       rend_sub = mat->rmap->N;
2677     }
2678     mloc_sub = rend_sub - rstart_sub;
2679 
2680     if (M == N) {
2681       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2682     } else { /* non-square matrix */
2683       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2684     }
2685     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2686     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2687     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2688     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2689   } else {
2690     C = *matredundant;
2691   }
2692 
2693   /* insert local matrix entries */
2694   rptr = sbuf_j;
2695   cols = sbuf_j + rend-rstart + 1;
2696   vals = sbuf_a;
2697   for (i=0; i<rend-rstart; i++) {
2698     row   = i + rstart;
2699     ncols = rptr[i+1] - rptr[i];
2700     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2701     vals += ncols;
2702     cols += ncols;
2703   }
2704   /* insert received matrix entries */
2705   for (imdex=0; imdex<nrecvs; imdex++) {
2706     rstart = rowrange[recv_rank[imdex]];
2707     rend   = rowrange[recv_rank[imdex]+1];
2708     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2709     rptr   = rbuf_j[imdex];
2710     cols   = rbuf_j[imdex] + rend-rstart + 1;
2711     vals   = rbuf_a[imdex];
2712     for (i=0; i<rend-rstart; i++) {
2713       row   = i + rstart;
2714       ncols = rptr[i+1] - rptr[i];
2715       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2716       vals += ncols;
2717       cols += ncols;
2718     }
2719   }
2720   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2721   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2722 
2723   if (reuse == MAT_INITIAL_MATRIX) {
2724     *matredundant = C;
2725 
2726     /* create a supporting struct and attach it to C for reuse */
2727     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2728     if (subsize == 1) {
2729       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2730       c->redundant = redund;
2731     } else {
2732       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2733       c->redundant = redund;
2734     }
2735 
2736     redund->nzlocal   = nzlocal;
2737     redund->nsends    = nsends;
2738     redund->nrecvs    = nrecvs;
2739     redund->send_rank = send_rank;
2740     redund->recv_rank = recv_rank;
2741     redund->sbuf_nz   = sbuf_nz;
2742     redund->rbuf_nz   = rbuf_nz;
2743     redund->sbuf_j    = sbuf_j;
2744     redund->sbuf_a    = sbuf_a;
2745     redund->rbuf_j    = rbuf_j;
2746     redund->rbuf_a    = rbuf_a;
2747     redund->psubcomm  = NULL;
2748   }
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2754 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2755 {
2756   PetscErrorCode ierr;
2757   MPI_Comm       comm;
2758   PetscMPIInt    size,subsize;
2759   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2760   Mat_Redundant  *redund=NULL;
2761   PetscSubcomm   psubcomm=NULL;
2762   MPI_Comm       subcomm_in=subcomm;
2763   Mat            *matseq;
2764   IS             isrow,iscol;
2765 
2766   PetscFunctionBegin;
2767   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2768     if (reuse ==  MAT_INITIAL_MATRIX) {
2769       /* create psubcomm, then get subcomm */
2770       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2771       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2772       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2773 
2774       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2775       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2776       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2777       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2778       subcomm = psubcomm->comm;
2779     } else { /* retrieve psubcomm and subcomm */
2780       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2781       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2782       if (subsize == 1) {
2783         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2784         redund = c->redundant;
2785       } else {
2786         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2787         redund = c->redundant;
2788       }
2789       psubcomm = redund->psubcomm;
2790     }
2791     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2792       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2793       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2794         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2795         if (subsize == 1) {
2796           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2797           c->redundant->psubcomm = psubcomm;
2798         } else {
2799           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2800           c->redundant->psubcomm = psubcomm ;
2801         }
2802       }
2803       PetscFunctionReturn(0);
2804     }
2805   }
2806 
2807   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2808   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2809   if (reuse == MAT_INITIAL_MATRIX) {
2810     /* create a local sequential matrix matseq[0] */
2811     mloc_sub = PETSC_DECIDE;
2812     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2813     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2814     rstart = rend - mloc_sub;
2815     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2816     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2817   } else { /* reuse == MAT_REUSE_MATRIX */
2818     if (subsize == 1) {
2819       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2820       redund = c->redundant;
2821     } else {
2822       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2823       redund = c->redundant;
2824     }
2825 
2826     isrow  = redund->isrow;
2827     iscol  = redund->iscol;
2828     matseq = redund->matseq;
2829   }
2830   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2831   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2832 
2833   if (reuse == MAT_INITIAL_MATRIX) {
2834     /* create a supporting struct and attach it to C for reuse */
2835     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2836     if (subsize == 1) {
2837       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2838       c->redundant = redund;
2839     } else {
2840       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2841       c->redundant = redund;
2842     }
2843     redund->isrow    = isrow;
2844     redund->iscol    = iscol;
2845     redund->matseq   = matseq;
2846     redund->psubcomm = psubcomm;
2847   }
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2853 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2854 {
2855   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2856   PetscErrorCode ierr;
2857   PetscInt       i,*idxb = 0;
2858   PetscScalar    *va,*vb;
2859   Vec            vtmp;
2860 
2861   PetscFunctionBegin;
2862   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2863   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2864   if (idx) {
2865     for (i=0; i<A->rmap->n; i++) {
2866       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2867     }
2868   }
2869 
2870   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2871   if (idx) {
2872     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2873   }
2874   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2875   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2876 
2877   for (i=0; i<A->rmap->n; i++) {
2878     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2879       va[i] = vb[i];
2880       if (idx) idx[i] = a->garray[idxb[i]];
2881     }
2882   }
2883 
2884   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2885   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2886   ierr = PetscFree(idxb);CHKERRQ(ierr);
2887   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2893 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2894 {
2895   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2896   PetscErrorCode ierr;
2897   PetscInt       i,*idxb = 0;
2898   PetscScalar    *va,*vb;
2899   Vec            vtmp;
2900 
2901   PetscFunctionBegin;
2902   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2903   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2904   if (idx) {
2905     for (i=0; i<A->cmap->n; i++) {
2906       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2907     }
2908   }
2909 
2910   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2911   if (idx) {
2912     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2913   }
2914   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2915   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2916 
2917   for (i=0; i<A->rmap->n; i++) {
2918     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2919       va[i] = vb[i];
2920       if (idx) idx[i] = a->garray[idxb[i]];
2921     }
2922   }
2923 
2924   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2925   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2926   ierr = PetscFree(idxb);CHKERRQ(ierr);
2927   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 #undef __FUNCT__
2932 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2933 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2934 {
2935   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2936   PetscInt       n      = A->rmap->n;
2937   PetscInt       cstart = A->cmap->rstart;
2938   PetscInt       *cmap  = mat->garray;
2939   PetscInt       *diagIdx, *offdiagIdx;
2940   Vec            diagV, offdiagV;
2941   PetscScalar    *a, *diagA, *offdiagA;
2942   PetscInt       r;
2943   PetscErrorCode ierr;
2944 
2945   PetscFunctionBegin;
2946   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2947   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2948   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2949   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2950   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2951   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2952   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2953   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2954   for (r = 0; r < n; ++r) {
2955     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2956       a[r]   = diagA[r];
2957       idx[r] = cstart + diagIdx[r];
2958     } else {
2959       a[r]   = offdiagA[r];
2960       idx[r] = cmap[offdiagIdx[r]];
2961     }
2962   }
2963   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2964   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2965   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2966   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2967   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2968   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2974 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2975 {
2976   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2977   PetscInt       n      = A->rmap->n;
2978   PetscInt       cstart = A->cmap->rstart;
2979   PetscInt       *cmap  = mat->garray;
2980   PetscInt       *diagIdx, *offdiagIdx;
2981   Vec            diagV, offdiagV;
2982   PetscScalar    *a, *diagA, *offdiagA;
2983   PetscInt       r;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2988   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2990   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2991   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2992   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2993   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2994   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2995   for (r = 0; r < n; ++r) {
2996     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2997       a[r]   = diagA[r];
2998       idx[r] = cstart + diagIdx[r];
2999     } else {
3000       a[r]   = offdiagA[r];
3001       idx[r] = cmap[offdiagIdx[r]];
3002     }
3003   }
3004   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3005   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3007   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3008   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3009   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3015 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3016 {
3017   PetscErrorCode ierr;
3018   Mat            *dummy;
3019 
3020   PetscFunctionBegin;
3021   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3022   *newmat = *dummy;
3023   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 #undef __FUNCT__
3028 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3029 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3030 {
3031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3032   PetscErrorCode ierr;
3033 
3034   PetscFunctionBegin;
3035   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3036   PetscFunctionReturn(0);
3037 }
3038 
3039 #undef __FUNCT__
3040 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3041 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3042 {
3043   PetscErrorCode ierr;
3044   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3045 
3046   PetscFunctionBegin;
3047   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3048   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3049   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3050   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 /* -------------------------------------------------------------------*/
3055 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3056                                        MatGetRow_MPIAIJ,
3057                                        MatRestoreRow_MPIAIJ,
3058                                        MatMult_MPIAIJ,
3059                                 /* 4*/ MatMultAdd_MPIAIJ,
3060                                        MatMultTranspose_MPIAIJ,
3061                                        MatMultTransposeAdd_MPIAIJ,
3062 #if defined(PETSC_HAVE_PBGL)
3063                                        MatSolve_MPIAIJ,
3064 #else
3065                                        0,
3066 #endif
3067                                        0,
3068                                        0,
3069                                 /*10*/ 0,
3070                                        0,
3071                                        0,
3072                                        MatSOR_MPIAIJ,
3073                                        MatTranspose_MPIAIJ,
3074                                 /*15*/ MatGetInfo_MPIAIJ,
3075                                        MatEqual_MPIAIJ,
3076                                        MatGetDiagonal_MPIAIJ,
3077                                        MatDiagonalScale_MPIAIJ,
3078                                        MatNorm_MPIAIJ,
3079                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3080                                        MatAssemblyEnd_MPIAIJ,
3081                                        MatSetOption_MPIAIJ,
3082                                        MatZeroEntries_MPIAIJ,
3083                                 /*24*/ MatZeroRows_MPIAIJ,
3084                                        0,
3085 #if defined(PETSC_HAVE_PBGL)
3086                                        0,
3087 #else
3088                                        0,
3089 #endif
3090                                        0,
3091                                        0,
3092                                 /*29*/ MatSetUp_MPIAIJ,
3093 #if defined(PETSC_HAVE_PBGL)
3094                                        0,
3095 #else
3096                                        0,
3097 #endif
3098                                        0,
3099                                        0,
3100                                        0,
3101                                 /*34*/ MatDuplicate_MPIAIJ,
3102                                        0,
3103                                        0,
3104                                        0,
3105                                        0,
3106                                 /*39*/ MatAXPY_MPIAIJ,
3107                                        MatGetSubMatrices_MPIAIJ,
3108                                        MatIncreaseOverlap_MPIAIJ,
3109                                        MatGetValues_MPIAIJ,
3110                                        MatCopy_MPIAIJ,
3111                                 /*44*/ MatGetRowMax_MPIAIJ,
3112                                        MatScale_MPIAIJ,
3113                                        0,
3114                                        0,
3115                                        MatZeroRowsColumns_MPIAIJ,
3116                                 /*49*/ MatSetRandom_MPIAIJ,
3117                                        0,
3118                                        0,
3119                                        0,
3120                                        0,
3121                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3122                                        0,
3123                                        MatSetUnfactored_MPIAIJ,
3124                                        MatPermute_MPIAIJ,
3125                                        0,
3126                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3127                                        MatDestroy_MPIAIJ,
3128                                        MatView_MPIAIJ,
3129                                        0,
3130                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3131                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3132                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3133                                        0,
3134                                        0,
3135                                        0,
3136                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3137                                        MatGetRowMinAbs_MPIAIJ,
3138                                        0,
3139                                        MatSetColoring_MPIAIJ,
3140                                        0,
3141                                        MatSetValuesAdifor_MPIAIJ,
3142                                 /*75*/ MatFDColoringApply_AIJ,
3143                                        0,
3144                                        0,
3145                                        0,
3146                                        MatFindZeroDiagonals_MPIAIJ,
3147                                 /*80*/ 0,
3148                                        0,
3149                                        0,
3150                                 /*83*/ MatLoad_MPIAIJ,
3151                                        0,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                        0,
3156                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3157                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3158                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3159                                        MatPtAP_MPIAIJ_MPIAIJ,
3160                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3161                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3162                                        0,
3163                                        0,
3164                                        0,
3165                                        0,
3166                                 /*99*/ 0,
3167                                        0,
3168                                        0,
3169                                        MatConjugate_MPIAIJ,
3170                                        0,
3171                                 /*104*/MatSetValuesRow_MPIAIJ,
3172                                        MatRealPart_MPIAIJ,
3173                                        MatImaginaryPart_MPIAIJ,
3174                                        0,
3175                                        0,
3176                                 /*109*/0,
3177                                        MatGetRedundantMatrix_MPIAIJ,
3178                                        MatGetRowMin_MPIAIJ,
3179                                        0,
3180                                        0,
3181                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3182                                        0,
3183                                        0,
3184                                        0,
3185                                        0,
3186                                 /*119*/0,
3187                                        0,
3188                                        0,
3189                                        0,
3190                                        MatGetMultiProcBlock_MPIAIJ,
3191                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3192                                        MatGetColumnNorms_MPIAIJ,
3193                                        MatInvertBlockDiagonal_MPIAIJ,
3194                                        0,
3195                                        MatGetSubMatricesParallel_MPIAIJ,
3196                                 /*129*/0,
3197                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3198                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3199                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3200                                        0,
3201                                 /*134*/0,
3202                                        0,
3203                                        0,
3204                                        0,
3205                                        0,
3206                                 /*139*/0,
3207                                        0,
3208                                        0,
3209                                        MatFDColoringSetUp_MPIXAIJ
3210 };
3211 
3212 /* ----------------------------------------------------------------------------------------*/
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3216 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3217 {
3218   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3219   PetscErrorCode ierr;
3220 
3221   PetscFunctionBegin;
3222   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3223   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3229 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3230 {
3231   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3232   PetscErrorCode ierr;
3233 
3234   PetscFunctionBegin;
3235   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3236   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3242 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3243 {
3244   Mat_MPIAIJ     *b;
3245   PetscErrorCode ierr;
3246 
3247   PetscFunctionBegin;
3248   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3249   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3250   b = (Mat_MPIAIJ*)B->data;
3251 
3252   if (!B->preallocated) {
3253     /* Explicitly create 2 MATSEQAIJ matrices. */
3254     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3255     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3256     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3257     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3258     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3259     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3260     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3261     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3262     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3263     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3264   }
3265 
3266   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3267   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3268   B->preallocated = PETSC_TRUE;
3269   PetscFunctionReturn(0);
3270 }
3271 
3272 #undef __FUNCT__
3273 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3274 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3275 {
3276   Mat            mat;
3277   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3278   PetscErrorCode ierr;
3279 
3280   PetscFunctionBegin;
3281   *newmat = 0;
3282   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3283   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3284   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3285   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3286   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3287   a       = (Mat_MPIAIJ*)mat->data;
3288 
3289   mat->factortype   = matin->factortype;
3290   mat->assembled    = PETSC_TRUE;
3291   mat->insertmode   = NOT_SET_VALUES;
3292   mat->preallocated = PETSC_TRUE;
3293 
3294   a->size         = oldmat->size;
3295   a->rank         = oldmat->rank;
3296   a->donotstash   = oldmat->donotstash;
3297   a->roworiented  = oldmat->roworiented;
3298   a->rowindices   = 0;
3299   a->rowvalues    = 0;
3300   a->getrowactive = PETSC_FALSE;
3301 
3302   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3303   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3304 
3305   if (oldmat->colmap) {
3306 #if defined(PETSC_USE_CTABLE)
3307     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3308 #else
3309     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3310     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3311     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3312 #endif
3313   } else a->colmap = 0;
3314   if (oldmat->garray) {
3315     PetscInt len;
3316     len  = oldmat->B->cmap->n;
3317     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3318     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3319     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3320   } else a->garray = 0;
3321 
3322   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3323   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3324   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3325   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3326   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3327   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3328   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3329   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3330   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3331   *newmat = mat;
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatLoad_MPIAIJ"
3339 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3340 {
3341   PetscScalar    *vals,*svals;
3342   MPI_Comm       comm;
3343   PetscErrorCode ierr;
3344   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3345   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3346   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3347   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3348   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3349   int            fd;
3350   PetscInt       bs = 1;
3351 
3352   PetscFunctionBegin;
3353   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3354   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3355   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3356   if (!rank) {
3357     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3358     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3359     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3360   }
3361 
3362   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3363   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3364   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3365 
3366   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3367 
3368   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3369   M    = header[1]; N = header[2];
3370   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3371   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3372   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3373 
3374   /* If global sizes are set, check if they are consistent with that given in the file */
3375   if (sizesset) {
3376     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3377   }
3378   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3379   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3380 
3381   /* determine ownership of all (block) rows */
3382   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3383   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3384   else m = newMat->rmap->n; /* Set by user */
3385 
3386   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3387   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3388 
3389   /* First process needs enough room for process with most rows */
3390   if (!rank) {
3391     mmax = rowners[1];
3392     for (i=2; i<=size; i++) {
3393       mmax = PetscMax(mmax, rowners[i]);
3394     }
3395   } else mmax = -1;             /* unused, but compilers complain */
3396 
3397   rowners[0] = 0;
3398   for (i=2; i<=size; i++) {
3399     rowners[i] += rowners[i-1];
3400   }
3401   rstart = rowners[rank];
3402   rend   = rowners[rank+1];
3403 
3404   /* distribute row lengths to all processors */
3405   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3406   if (!rank) {
3407     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3408     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3409     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3410     for (j=0; j<m; j++) {
3411       procsnz[0] += ourlens[j];
3412     }
3413     for (i=1; i<size; i++) {
3414       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3415       /* calculate the number of nonzeros on each processor */
3416       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3417         procsnz[i] += rowlengths[j];
3418       }
3419       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3420     }
3421     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3422   } else {
3423     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3424   }
3425 
3426   if (!rank) {
3427     /* determine max buffer needed and allocate it */
3428     maxnz = 0;
3429     for (i=0; i<size; i++) {
3430       maxnz = PetscMax(maxnz,procsnz[i]);
3431     }
3432     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3433 
3434     /* read in my part of the matrix column indices  */
3435     nz   = procsnz[0];
3436     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3437     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3438 
3439     /* read in every one elses and ship off */
3440     for (i=1; i<size; i++) {
3441       nz   = procsnz[i];
3442       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3443       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3444     }
3445     ierr = PetscFree(cols);CHKERRQ(ierr);
3446   } else {
3447     /* determine buffer space needed for message */
3448     nz = 0;
3449     for (i=0; i<m; i++) {
3450       nz += ourlens[i];
3451     }
3452     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3453 
3454     /* receive message of column indices*/
3455     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3456   }
3457 
3458   /* determine column ownership if matrix is not square */
3459   if (N != M) {
3460     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3461     else n = newMat->cmap->n;
3462     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3463     cstart = cend - n;
3464   } else {
3465     cstart = rstart;
3466     cend   = rend;
3467     n      = cend - cstart;
3468   }
3469 
3470   /* loop over local rows, determining number of off diagonal entries */
3471   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3472   jj   = 0;
3473   for (i=0; i<m; i++) {
3474     for (j=0; j<ourlens[i]; j++) {
3475       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3476       jj++;
3477     }
3478   }
3479 
3480   for (i=0; i<m; i++) {
3481     ourlens[i] -= offlens[i];
3482   }
3483   if (!sizesset) {
3484     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3485   }
3486 
3487   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3488 
3489   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3490 
3491   for (i=0; i<m; i++) {
3492     ourlens[i] += offlens[i];
3493   }
3494 
3495   if (!rank) {
3496     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3497 
3498     /* read in my part of the matrix numerical values  */
3499     nz   = procsnz[0];
3500     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3501 
3502     /* insert into matrix */
3503     jj      = rstart;
3504     smycols = mycols;
3505     svals   = vals;
3506     for (i=0; i<m; i++) {
3507       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3508       smycols += ourlens[i];
3509       svals   += ourlens[i];
3510       jj++;
3511     }
3512 
3513     /* read in other processors and ship out */
3514     for (i=1; i<size; i++) {
3515       nz   = procsnz[i];
3516       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3517       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3518     }
3519     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3520   } else {
3521     /* receive numeric values */
3522     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3523 
3524     /* receive message of values*/
3525     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3526 
3527     /* insert into matrix */
3528     jj      = rstart;
3529     smycols = mycols;
3530     svals   = vals;
3531     for (i=0; i<m; i++) {
3532       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3533       smycols += ourlens[i];
3534       svals   += ourlens[i];
3535       jj++;
3536     }
3537   }
3538   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3539   ierr = PetscFree(vals);CHKERRQ(ierr);
3540   ierr = PetscFree(mycols);CHKERRQ(ierr);
3541   ierr = PetscFree(rowners);CHKERRQ(ierr);
3542   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3543   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3544   PetscFunctionReturn(0);
3545 }
3546 
3547 #undef __FUNCT__
3548 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3549 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3550 {
3551   PetscErrorCode ierr;
3552   IS             iscol_local;
3553   PetscInt       csize;
3554 
3555   PetscFunctionBegin;
3556   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3557   if (call == MAT_REUSE_MATRIX) {
3558     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3559     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3560   } else {
3561     PetscInt cbs;
3562     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3563     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3564     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3565   }
3566   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3567   if (call == MAT_INITIAL_MATRIX) {
3568     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3569     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3570   }
3571   PetscFunctionReturn(0);
3572 }
3573 
3574 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3575 #undef __FUNCT__
3576 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3577 /*
3578     Not great since it makes two copies of the submatrix, first an SeqAIJ
3579   in local and then by concatenating the local matrices the end result.
3580   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3581 
3582   Note: This requires a sequential iscol with all indices.
3583 */
3584 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3585 {
3586   PetscErrorCode ierr;
3587   PetscMPIInt    rank,size;
3588   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3589   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3590   PetscBool      allcolumns, colflag;
3591   Mat            M,Mreuse;
3592   MatScalar      *vwork,*aa;
3593   MPI_Comm       comm;
3594   Mat_SeqAIJ     *aij;
3595 
3596   PetscFunctionBegin;
3597   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3598   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3599   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3600 
3601   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3602   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3603   if (colflag && ncol == mat->cmap->N) {
3604     allcolumns = PETSC_TRUE;
3605   } else {
3606     allcolumns = PETSC_FALSE;
3607   }
3608   if (call ==  MAT_REUSE_MATRIX) {
3609     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3610     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3611     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3612   } else {
3613     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3614   }
3615 
3616   /*
3617       m - number of local rows
3618       n - number of columns (same on all processors)
3619       rstart - first row in new global matrix generated
3620   */
3621   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3622   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3623   if (call == MAT_INITIAL_MATRIX) {
3624     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3625     ii  = aij->i;
3626     jj  = aij->j;
3627 
3628     /*
3629         Determine the number of non-zeros in the diagonal and off-diagonal
3630         portions of the matrix in order to do correct preallocation
3631     */
3632 
3633     /* first get start and end of "diagonal" columns */
3634     if (csize == PETSC_DECIDE) {
3635       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3636       if (mglobal == n) { /* square matrix */
3637         nlocal = m;
3638       } else {
3639         nlocal = n/size + ((n % size) > rank);
3640       }
3641     } else {
3642       nlocal = csize;
3643     }
3644     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3645     rstart = rend - nlocal;
3646     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3647 
3648     /* next, compute all the lengths */
3649     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3650     olens = dlens + m;
3651     for (i=0; i<m; i++) {
3652       jend = ii[i+1] - ii[i];
3653       olen = 0;
3654       dlen = 0;
3655       for (j=0; j<jend; j++) {
3656         if (*jj < rstart || *jj >= rend) olen++;
3657         else dlen++;
3658         jj++;
3659       }
3660       olens[i] = olen;
3661       dlens[i] = dlen;
3662     }
3663     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3664     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3665     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3666     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3667     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3668     ierr = PetscFree(dlens);CHKERRQ(ierr);
3669   } else {
3670     PetscInt ml,nl;
3671 
3672     M    = *newmat;
3673     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3674     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3675     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3676     /*
3677          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3678        rather than the slower MatSetValues().
3679     */
3680     M->was_assembled = PETSC_TRUE;
3681     M->assembled     = PETSC_FALSE;
3682   }
3683   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3684   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3685   ii   = aij->i;
3686   jj   = aij->j;
3687   aa   = aij->a;
3688   for (i=0; i<m; i++) {
3689     row   = rstart + i;
3690     nz    = ii[i+1] - ii[i];
3691     cwork = jj;     jj += nz;
3692     vwork = aa;     aa += nz;
3693     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3694   }
3695 
3696   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3697   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3698   *newmat = M;
3699 
3700   /* save submatrix used in processor for next request */
3701   if (call ==  MAT_INITIAL_MATRIX) {
3702     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3703     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3704   }
3705   PetscFunctionReturn(0);
3706 }
3707 
3708 #undef __FUNCT__
3709 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3710 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3711 {
3712   PetscInt       m,cstart, cend,j,nnz,i,d;
3713   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3714   const PetscInt *JJ;
3715   PetscScalar    *values;
3716   PetscErrorCode ierr;
3717 
3718   PetscFunctionBegin;
3719   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3720 
3721   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3722   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3723   m      = B->rmap->n;
3724   cstart = B->cmap->rstart;
3725   cend   = B->cmap->rend;
3726   rstart = B->rmap->rstart;
3727 
3728   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3729 
3730 #if defined(PETSC_USE_DEBUGGING)
3731   for (i=0; i<m; i++) {
3732     nnz = Ii[i+1]- Ii[i];
3733     JJ  = J + Ii[i];
3734     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3735     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3736     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3737   }
3738 #endif
3739 
3740   for (i=0; i<m; i++) {
3741     nnz     = Ii[i+1]- Ii[i];
3742     JJ      = J + Ii[i];
3743     nnz_max = PetscMax(nnz_max,nnz);
3744     d       = 0;
3745     for (j=0; j<nnz; j++) {
3746       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3747     }
3748     d_nnz[i] = d;
3749     o_nnz[i] = nnz - d;
3750   }
3751   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3752   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3753 
3754   if (v) values = (PetscScalar*)v;
3755   else {
3756     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3757   }
3758 
3759   for (i=0; i<m; i++) {
3760     ii   = i + rstart;
3761     nnz  = Ii[i+1]- Ii[i];
3762     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3763   }
3764   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3766 
3767   if (!v) {
3768     ierr = PetscFree(values);CHKERRQ(ierr);
3769   }
3770   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3776 /*@
3777    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3778    (the default parallel PETSc format).
3779 
3780    Collective on MPI_Comm
3781 
3782    Input Parameters:
3783 +  B - the matrix
3784 .  i - the indices into j for the start of each local row (starts with zero)
3785 .  j - the column indices for each local row (starts with zero)
3786 -  v - optional values in the matrix
3787 
3788    Level: developer
3789 
3790    Notes:
3791        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3792      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3793      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3794 
3795        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3796 
3797        The format which is used for the sparse matrix input, is equivalent to a
3798     row-major ordering.. i.e for the following matrix, the input data expected is
3799     as shown:
3800 
3801         1 0 0
3802         2 0 3     P0
3803        -------
3804         4 5 6     P1
3805 
3806      Process0 [P0]: rows_owned=[0,1]
3807         i =  {0,1,3}  [size = nrow+1  = 2+1]
3808         j =  {0,0,2}  [size = nz = 6]
3809         v =  {1,2,3}  [size = nz = 6]
3810 
3811      Process1 [P1]: rows_owned=[2]
3812         i =  {0,3}    [size = nrow+1  = 1+1]
3813         j =  {0,1,2}  [size = nz = 6]
3814         v =  {4,5,6}  [size = nz = 6]
3815 
3816 .keywords: matrix, aij, compressed row, sparse, parallel
3817 
3818 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3819           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3820 @*/
3821 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3822 {
3823   PetscErrorCode ierr;
3824 
3825   PetscFunctionBegin;
3826   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3827   PetscFunctionReturn(0);
3828 }
3829 
3830 #undef __FUNCT__
3831 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3832 /*@C
3833    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3834    (the default parallel PETSc format).  For good matrix assembly performance
3835    the user should preallocate the matrix storage by setting the parameters
3836    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3837    performance can be increased by more than a factor of 50.
3838 
3839    Collective on MPI_Comm
3840 
3841    Input Parameters:
3842 +  B - the matrix
3843 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3844            (same value is used for all local rows)
3845 .  d_nnz - array containing the number of nonzeros in the various rows of the
3846            DIAGONAL portion of the local submatrix (possibly different for each row)
3847            or NULL, if d_nz is used to specify the nonzero structure.
3848            The size of this array is equal to the number of local rows, i.e 'm'.
3849            For matrices that will be factored, you must leave room for (and set)
3850            the diagonal entry even if it is zero.
3851 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3852            submatrix (same value is used for all local rows).
3853 -  o_nnz - array containing the number of nonzeros in the various rows of the
3854            OFF-DIAGONAL portion of the local submatrix (possibly different for
3855            each row) or NULL, if o_nz is used to specify the nonzero
3856            structure. The size of this array is equal to the number
3857            of local rows, i.e 'm'.
3858 
3859    If the *_nnz parameter is given then the *_nz parameter is ignored
3860 
3861    The AIJ format (also called the Yale sparse matrix format or
3862    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3863    storage.  The stored row and column indices begin with zero.
3864    See Users-Manual: ch_mat for details.
3865 
3866    The parallel matrix is partitioned such that the first m0 rows belong to
3867    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3868    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3869 
3870    The DIAGONAL portion of the local submatrix of a processor can be defined
3871    as the submatrix which is obtained by extraction the part corresponding to
3872    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3873    first row that belongs to the processor, r2 is the last row belonging to
3874    the this processor, and c1-c2 is range of indices of the local part of a
3875    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3876    common case of a square matrix, the row and column ranges are the same and
3877    the DIAGONAL part is also square. The remaining portion of the local
3878    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3879 
3880    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3881 
3882    You can call MatGetInfo() to get information on how effective the preallocation was;
3883    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3884    You can also run with the option -info and look for messages with the string
3885    malloc in them to see if additional memory allocation was needed.
3886 
3887    Example usage:
3888 
3889    Consider the following 8x8 matrix with 34 non-zero values, that is
3890    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3891    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3892    as follows:
3893 
3894 .vb
3895             1  2  0  |  0  3  0  |  0  4
3896     Proc0   0  5  6  |  7  0  0  |  8  0
3897             9  0 10  | 11  0  0  | 12  0
3898     -------------------------------------
3899            13  0 14  | 15 16 17  |  0  0
3900     Proc1   0 18  0  | 19 20 21  |  0  0
3901             0  0  0  | 22 23  0  | 24  0
3902     -------------------------------------
3903     Proc2  25 26 27  |  0  0 28  | 29  0
3904            30  0  0  | 31 32 33  |  0 34
3905 .ve
3906 
3907    This can be represented as a collection of submatrices as:
3908 
3909 .vb
3910       A B C
3911       D E F
3912       G H I
3913 .ve
3914 
3915    Where the submatrices A,B,C are owned by proc0, D,E,F are
3916    owned by proc1, G,H,I are owned by proc2.
3917 
3918    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3919    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3920    The 'M','N' parameters are 8,8, and have the same values on all procs.
3921 
3922    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3923    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3924    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3925    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3926    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3927    matrix, ans [DF] as another SeqAIJ matrix.
3928 
3929    When d_nz, o_nz parameters are specified, d_nz storage elements are
3930    allocated for every row of the local diagonal submatrix, and o_nz
3931    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3932    One way to choose d_nz and o_nz is to use the max nonzerors per local
3933    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3934    In this case, the values of d_nz,o_nz are:
3935 .vb
3936      proc0 : dnz = 2, o_nz = 2
3937      proc1 : dnz = 3, o_nz = 2
3938      proc2 : dnz = 1, o_nz = 4
3939 .ve
3940    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3941    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3942    for proc3. i.e we are using 12+15+10=37 storage locations to store
3943    34 values.
3944 
3945    When d_nnz, o_nnz parameters are specified, the storage is specified
3946    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3947    In the above case the values for d_nnz,o_nnz are:
3948 .vb
3949      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3950      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3951      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3952 .ve
3953    Here the space allocated is sum of all the above values i.e 34, and
3954    hence pre-allocation is perfect.
3955 
3956    Level: intermediate
3957 
3958 .keywords: matrix, aij, compressed row, sparse, parallel
3959 
3960 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3961           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3962 @*/
3963 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3964 {
3965   PetscErrorCode ierr;
3966 
3967   PetscFunctionBegin;
3968   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3969   PetscValidType(B,1);
3970   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3971   PetscFunctionReturn(0);
3972 }
3973 
3974 #undef __FUNCT__
3975 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3976 /*@
3977      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3978          CSR format the local rows.
3979 
3980    Collective on MPI_Comm
3981 
3982    Input Parameters:
3983 +  comm - MPI communicator
3984 .  m - number of local rows (Cannot be PETSC_DECIDE)
3985 .  n - This value should be the same as the local size used in creating the
3986        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3987        calculated if N is given) For square matrices n is almost always m.
3988 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3989 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3990 .   i - row indices
3991 .   j - column indices
3992 -   a - matrix values
3993 
3994    Output Parameter:
3995 .   mat - the matrix
3996 
3997    Level: intermediate
3998 
3999    Notes:
4000        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4001      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4002      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4003 
4004        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4005 
4006        The format which is used for the sparse matrix input, is equivalent to a
4007     row-major ordering.. i.e for the following matrix, the input data expected is
4008     as shown:
4009 
4010         1 0 0
4011         2 0 3     P0
4012        -------
4013         4 5 6     P1
4014 
4015      Process0 [P0]: rows_owned=[0,1]
4016         i =  {0,1,3}  [size = nrow+1  = 2+1]
4017         j =  {0,0,2}  [size = nz = 6]
4018         v =  {1,2,3}  [size = nz = 6]
4019 
4020      Process1 [P1]: rows_owned=[2]
4021         i =  {0,3}    [size = nrow+1  = 1+1]
4022         j =  {0,1,2}  [size = nz = 6]
4023         v =  {4,5,6}  [size = nz = 6]
4024 
4025 .keywords: matrix, aij, compressed row, sparse, parallel
4026 
4027 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4028           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4029 @*/
4030 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4031 {
4032   PetscErrorCode ierr;
4033 
4034   PetscFunctionBegin;
4035   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4036   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4037   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4038   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4039   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4040   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4041   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4042   PetscFunctionReturn(0);
4043 }
4044 
4045 #undef __FUNCT__
4046 #define __FUNCT__ "MatCreateAIJ"
4047 /*@C
4048    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4049    (the default parallel PETSc format).  For good matrix assembly performance
4050    the user should preallocate the matrix storage by setting the parameters
4051    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4052    performance can be increased by more than a factor of 50.
4053 
4054    Collective on MPI_Comm
4055 
4056    Input Parameters:
4057 +  comm - MPI communicator
4058 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4059            This value should be the same as the local size used in creating the
4060            y vector for the matrix-vector product y = Ax.
4061 .  n - This value should be the same as the local size used in creating the
4062        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4063        calculated if N is given) For square matrices n is almost always m.
4064 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4065 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4066 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4067            (same value is used for all local rows)
4068 .  d_nnz - array containing the number of nonzeros in the various rows of the
4069            DIAGONAL portion of the local submatrix (possibly different for each row)
4070            or NULL, if d_nz is used to specify the nonzero structure.
4071            The size of this array is equal to the number of local rows, i.e 'm'.
4072 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4073            submatrix (same value is used for all local rows).
4074 -  o_nnz - array containing the number of nonzeros in the various rows of the
4075            OFF-DIAGONAL portion of the local submatrix (possibly different for
4076            each row) or NULL, if o_nz is used to specify the nonzero
4077            structure. The size of this array is equal to the number
4078            of local rows, i.e 'm'.
4079 
4080    Output Parameter:
4081 .  A - the matrix
4082 
4083    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4084    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4085    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4086 
4087    Notes:
4088    If the *_nnz parameter is given then the *_nz parameter is ignored
4089 
4090    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4091    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4092    storage requirements for this matrix.
4093 
4094    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4095    processor than it must be used on all processors that share the object for
4096    that argument.
4097 
4098    The user MUST specify either the local or global matrix dimensions
4099    (possibly both).
4100 
4101    The parallel matrix is partitioned across processors such that the
4102    first m0 rows belong to process 0, the next m1 rows belong to
4103    process 1, the next m2 rows belong to process 2 etc.. where
4104    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4105    values corresponding to [m x N] submatrix.
4106 
4107    The columns are logically partitioned with the n0 columns belonging
4108    to 0th partition, the next n1 columns belonging to the next
4109    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4110 
4111    The DIAGONAL portion of the local submatrix on any given processor
4112    is the submatrix corresponding to the rows and columns m,n
4113    corresponding to the given processor. i.e diagonal matrix on
4114    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4115    etc. The remaining portion of the local submatrix [m x (N-n)]
4116    constitute the OFF-DIAGONAL portion. The example below better
4117    illustrates this concept.
4118 
4119    For a square global matrix we define each processor's diagonal portion
4120    to be its local rows and the corresponding columns (a square submatrix);
4121    each processor's off-diagonal portion encompasses the remainder of the
4122    local matrix (a rectangular submatrix).
4123 
4124    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4125 
4126    When calling this routine with a single process communicator, a matrix of
4127    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4128    type of communicator, use the construction mechanism:
4129      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4130 
4131    By default, this format uses inodes (identical nodes) when possible.
4132    We search for consecutive rows with the same nonzero structure, thereby
4133    reusing matrix information to achieve increased efficiency.
4134 
4135    Options Database Keys:
4136 +  -mat_no_inode  - Do not use inodes
4137 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4138 -  -mat_aij_oneindex - Internally use indexing starting at 1
4139         rather than 0.  Note that when calling MatSetValues(),
4140         the user still MUST index entries starting at 0!
4141 
4142 
4143    Example usage:
4144 
4145    Consider the following 8x8 matrix with 34 non-zero values, that is
4146    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4147    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4148    as follows:
4149 
4150 .vb
4151             1  2  0  |  0  3  0  |  0  4
4152     Proc0   0  5  6  |  7  0  0  |  8  0
4153             9  0 10  | 11  0  0  | 12  0
4154     -------------------------------------
4155            13  0 14  | 15 16 17  |  0  0
4156     Proc1   0 18  0  | 19 20 21  |  0  0
4157             0  0  0  | 22 23  0  | 24  0
4158     -------------------------------------
4159     Proc2  25 26 27  |  0  0 28  | 29  0
4160            30  0  0  | 31 32 33  |  0 34
4161 .ve
4162 
4163    This can be represented as a collection of submatrices as:
4164 
4165 .vb
4166       A B C
4167       D E F
4168       G H I
4169 .ve
4170 
4171    Where the submatrices A,B,C are owned by proc0, D,E,F are
4172    owned by proc1, G,H,I are owned by proc2.
4173 
4174    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4175    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4176    The 'M','N' parameters are 8,8, and have the same values on all procs.
4177 
4178    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4179    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4180    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4181    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4182    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4183    matrix, ans [DF] as another SeqAIJ matrix.
4184 
4185    When d_nz, o_nz parameters are specified, d_nz storage elements are
4186    allocated for every row of the local diagonal submatrix, and o_nz
4187    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4188    One way to choose d_nz and o_nz is to use the max nonzerors per local
4189    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4190    In this case, the values of d_nz,o_nz are:
4191 .vb
4192      proc0 : dnz = 2, o_nz = 2
4193      proc1 : dnz = 3, o_nz = 2
4194      proc2 : dnz = 1, o_nz = 4
4195 .ve
4196    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4197    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4198    for proc3. i.e we are using 12+15+10=37 storage locations to store
4199    34 values.
4200 
4201    When d_nnz, o_nnz parameters are specified, the storage is specified
4202    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4203    In the above case the values for d_nnz,o_nnz are:
4204 .vb
4205      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4206      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4207      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4208 .ve
4209    Here the space allocated is sum of all the above values i.e 34, and
4210    hence pre-allocation is perfect.
4211 
4212    Level: intermediate
4213 
4214 .keywords: matrix, aij, compressed row, sparse, parallel
4215 
4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4217           MPIAIJ, MatCreateMPIAIJWithArrays()
4218 @*/
4219 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4220 {
4221   PetscErrorCode ierr;
4222   PetscMPIInt    size;
4223 
4224   PetscFunctionBegin;
4225   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4226   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4227   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4228   if (size > 1) {
4229     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4230     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4231   } else {
4232     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4233     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4234   }
4235   PetscFunctionReturn(0);
4236 }
4237 
4238 #undef __FUNCT__
4239 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4240 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4241 {
4242   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4243 
4244   PetscFunctionBegin;
4245   if (Ad)     *Ad     = a->A;
4246   if (Ao)     *Ao     = a->B;
4247   if (colmap) *colmap = a->garray;
4248   PetscFunctionReturn(0);
4249 }
4250 
4251 #undef __FUNCT__
4252 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4253 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4254 {
4255   PetscErrorCode ierr;
4256   PetscInt       i;
4257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4258 
4259   PetscFunctionBegin;
4260   if (coloring->ctype == IS_COLORING_GLOBAL) {
4261     ISColoringValue *allcolors,*colors;
4262     ISColoring      ocoloring;
4263 
4264     /* set coloring for diagonal portion */
4265     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4266 
4267     /* set coloring for off-diagonal portion */
4268     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4269     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4270     for (i=0; i<a->B->cmap->n; i++) {
4271       colors[i] = allcolors[a->garray[i]];
4272     }
4273     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4274     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4275     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4276     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4277   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4278     ISColoringValue *colors;
4279     PetscInt        *larray;
4280     ISColoring      ocoloring;
4281 
4282     /* set coloring for diagonal portion */
4283     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4284     for (i=0; i<a->A->cmap->n; i++) {
4285       larray[i] = i + A->cmap->rstart;
4286     }
4287     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4288     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4289     for (i=0; i<a->A->cmap->n; i++) {
4290       colors[i] = coloring->colors[larray[i]];
4291     }
4292     ierr = PetscFree(larray);CHKERRQ(ierr);
4293     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4294     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4295     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4296 
4297     /* set coloring for off-diagonal portion */
4298     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4299     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4300     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4301     for (i=0; i<a->B->cmap->n; i++) {
4302       colors[i] = coloring->colors[larray[i]];
4303     }
4304     ierr = PetscFree(larray);CHKERRQ(ierr);
4305     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4306     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4307     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4308   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4309   PetscFunctionReturn(0);
4310 }
4311 
4312 #undef __FUNCT__
4313 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4314 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4315 {
4316   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4317   PetscErrorCode ierr;
4318 
4319   PetscFunctionBegin;
4320   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4321   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4322   PetscFunctionReturn(0);
4323 }
4324 
4325 #undef __FUNCT__
4326 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4327 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4328 {
4329   PetscErrorCode ierr;
4330   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4331   PetscInt       *indx;
4332 
4333   PetscFunctionBegin;
4334   /* This routine will ONLY return MPIAIJ type matrix */
4335   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4336   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4337   if (n == PETSC_DECIDE) {
4338     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4339   }
4340   /* Check sum(n) = N */
4341   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4342   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4343 
4344   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4345   rstart -= m;
4346 
4347   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4348   for (i=0; i<m; i++) {
4349     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4350     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4351     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4352   }
4353 
4354   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4355   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4356   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4357   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4358   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4359   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4360   PetscFunctionReturn(0);
4361 }
4362 
4363 #undef __FUNCT__
4364 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4365 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4366 {
4367   PetscErrorCode ierr;
4368   PetscInt       m,N,i,rstart,nnz,Ii;
4369   PetscInt       *indx;
4370   PetscScalar    *values;
4371 
4372   PetscFunctionBegin;
4373   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4374   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4375   for (i=0; i<m; i++) {
4376     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4377     Ii   = i + rstart;
4378     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4379     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4380   }
4381   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4382   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   PetscFunctionReturn(0);
4384 }
4385 
4386 #undef __FUNCT__
4387 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4388 /*@
4389       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4390                  matrices from each processor
4391 
4392     Collective on MPI_Comm
4393 
4394    Input Parameters:
4395 +    comm - the communicators the parallel matrix will live on
4396 .    inmat - the input sequential matrices
4397 .    n - number of local columns (or PETSC_DECIDE)
4398 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4399 
4400    Output Parameter:
4401 .    outmat - the parallel matrix generated
4402 
4403     Level: advanced
4404 
4405    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4406 
4407 @*/
4408 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4409 {
4410   PetscErrorCode ierr;
4411   PetscMPIInt    size;
4412 
4413   PetscFunctionBegin;
4414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4415   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4416   if (size == 1) {
4417     if (scall == MAT_INITIAL_MATRIX) {
4418       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4419     } else {
4420       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4421     }
4422   } else {
4423     if (scall == MAT_INITIAL_MATRIX) {
4424       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4425     }
4426     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4427   }
4428   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 #undef __FUNCT__
4433 #define __FUNCT__ "MatFileSplit"
4434 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4435 {
4436   PetscErrorCode    ierr;
4437   PetscMPIInt       rank;
4438   PetscInt          m,N,i,rstart,nnz;
4439   size_t            len;
4440   const PetscInt    *indx;
4441   PetscViewer       out;
4442   char              *name;
4443   Mat               B;
4444   const PetscScalar *values;
4445 
4446   PetscFunctionBegin;
4447   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4448   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4449   /* Should this be the type of the diagonal block of A? */
4450   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4451   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4452   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4453   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4454   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4455   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4456   for (i=0; i<m; i++) {
4457     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4458     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4459     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4460   }
4461   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4462   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4463 
4464   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4465   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4466   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4467   sprintf(name,"%s.%d",outfile,rank);
4468   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4469   ierr = PetscFree(name);CHKERRQ(ierr);
4470   ierr = MatView(B,out);CHKERRQ(ierr);
4471   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4472   ierr = MatDestroy(&B);CHKERRQ(ierr);
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4477 #undef __FUNCT__
4478 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4479 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4480 {
4481   PetscErrorCode      ierr;
4482   Mat_Merge_SeqsToMPI *merge;
4483   PetscContainer      container;
4484 
4485   PetscFunctionBegin;
4486   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4487   if (container) {
4488     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4501     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4502     ierr = PetscFree(merge);CHKERRQ(ierr);
4503     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4504   }
4505   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 #include <../src/mat/utils/freespace.h>
4510 #include <petscbt.h>
4511 
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4514 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4515 {
4516   PetscErrorCode      ierr;
4517   MPI_Comm            comm;
4518   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4519   PetscMPIInt         size,rank,taga,*len_s;
4520   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4521   PetscInt            proc,m;
4522   PetscInt            **buf_ri,**buf_rj;
4523   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4524   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4525   MPI_Request         *s_waits,*r_waits;
4526   MPI_Status          *status;
4527   MatScalar           *aa=a->a;
4528   MatScalar           **abuf_r,*ba_i;
4529   Mat_Merge_SeqsToMPI *merge;
4530   PetscContainer      container;
4531 
4532   PetscFunctionBegin;
4533   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4534   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4535 
4536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4537   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4538 
4539   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4540   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4541 
4542   bi     = merge->bi;
4543   bj     = merge->bj;
4544   buf_ri = merge->buf_ri;
4545   buf_rj = merge->buf_rj;
4546 
4547   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4548   owners = merge->rowmap->range;
4549   len_s  = merge->len_s;
4550 
4551   /* send and recv matrix values */
4552   /*-----------------------------*/
4553   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4554   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4555 
4556   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4557   for (proc=0,k=0; proc<size; proc++) {
4558     if (!len_s[proc]) continue;
4559     i    = owners[proc];
4560     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4561     k++;
4562   }
4563 
4564   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4565   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4566   ierr = PetscFree(status);CHKERRQ(ierr);
4567 
4568   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4569   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4570 
4571   /* insert mat values of mpimat */
4572   /*----------------------------*/
4573   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4574   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4575 
4576   for (k=0; k<merge->nrecv; k++) {
4577     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4578     nrows       = *(buf_ri_k[k]);
4579     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4580     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4581   }
4582 
4583   /* set values of ba */
4584   m = merge->rowmap->n;
4585   for (i=0; i<m; i++) {
4586     arow = owners[rank] + i;
4587     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4588     bnzi = bi[i+1] - bi[i];
4589     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4590 
4591     /* add local non-zero vals of this proc's seqmat into ba */
4592     anzi   = ai[arow+1] - ai[arow];
4593     aj     = a->j + ai[arow];
4594     aa     = a->a + ai[arow];
4595     nextaj = 0;
4596     for (j=0; nextaj<anzi; j++) {
4597       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4598         ba_i[j] += aa[nextaj++];
4599       }
4600     }
4601 
4602     /* add received vals into ba */
4603     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4604       /* i-th row */
4605       if (i == *nextrow[k]) {
4606         anzi   = *(nextai[k]+1) - *nextai[k];
4607         aj     = buf_rj[k] + *(nextai[k]);
4608         aa     = abuf_r[k] + *(nextai[k]);
4609         nextaj = 0;
4610         for (j=0; nextaj<anzi; j++) {
4611           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4612             ba_i[j] += aa[nextaj++];
4613           }
4614         }
4615         nextrow[k]++; nextai[k]++;
4616       }
4617     }
4618     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4619   }
4620   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4621   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4622 
4623   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4624   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4625   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4626   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4627   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4628   PetscFunctionReturn(0);
4629 }
4630 
4631 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4632 
4633 #undef __FUNCT__
4634 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4635 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4636 {
4637   PetscErrorCode      ierr;
4638   Mat                 B_mpi;
4639   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4640   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4641   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4642   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4643   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4644   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4645   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4646   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4647   MPI_Status          *status;
4648   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4649   PetscBT             lnkbt;
4650   Mat_Merge_SeqsToMPI *merge;
4651   PetscContainer      container;
4652 
4653   PetscFunctionBegin;
4654   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4655 
4656   /* make sure it is a PETSc comm */
4657   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4659   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4660 
4661   ierr = PetscNew(&merge);CHKERRQ(ierr);
4662   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4663 
4664   /* determine row ownership */
4665   /*---------------------------------------------------------*/
4666   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4670   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4671   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4672   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4673 
4674   m      = merge->rowmap->n;
4675   owners = merge->rowmap->range;
4676 
4677   /* determine the number of messages to send, their lengths */
4678   /*---------------------------------------------------------*/
4679   len_s = merge->len_s;
4680 
4681   len          = 0; /* length of buf_si[] */
4682   merge->nsend = 0;
4683   for (proc=0; proc<size; proc++) {
4684     len_si[proc] = 0;
4685     if (proc == rank) {
4686       len_s[proc] = 0;
4687     } else {
4688       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4689       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4690     }
4691     if (len_s[proc]) {
4692       merge->nsend++;
4693       nrows = 0;
4694       for (i=owners[proc]; i<owners[proc+1]; i++) {
4695         if (ai[i+1] > ai[i]) nrows++;
4696       }
4697       len_si[proc] = 2*(nrows+1);
4698       len         += len_si[proc];
4699     }
4700   }
4701 
4702   /* determine the number and length of messages to receive for ij-structure */
4703   /*-------------------------------------------------------------------------*/
4704   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4705   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4706 
4707   /* post the Irecv of j-structure */
4708   /*-------------------------------*/
4709   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4710   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4711 
4712   /* post the Isend of j-structure */
4713   /*--------------------------------*/
4714   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4715 
4716   for (proc=0, k=0; proc<size; proc++) {
4717     if (!len_s[proc]) continue;
4718     i    = owners[proc];
4719     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4720     k++;
4721   }
4722 
4723   /* receives and sends of j-structure are complete */
4724   /*------------------------------------------------*/
4725   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4726   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4727 
4728   /* send and recv i-structure */
4729   /*---------------------------*/
4730   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4731   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4732 
4733   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4734   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4735   for (proc=0,k=0; proc<size; proc++) {
4736     if (!len_s[proc]) continue;
4737     /* form outgoing message for i-structure:
4738          buf_si[0]:                 nrows to be sent
4739                [1:nrows]:           row index (global)
4740                [nrows+1:2*nrows+1]: i-structure index
4741     */
4742     /*-------------------------------------------*/
4743     nrows       = len_si[proc]/2 - 1;
4744     buf_si_i    = buf_si + nrows+1;
4745     buf_si[0]   = nrows;
4746     buf_si_i[0] = 0;
4747     nrows       = 0;
4748     for (i=owners[proc]; i<owners[proc+1]; i++) {
4749       anzi = ai[i+1] - ai[i];
4750       if (anzi) {
4751         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4752         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4753         nrows++;
4754       }
4755     }
4756     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4757     k++;
4758     buf_si += len_si[proc];
4759   }
4760 
4761   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4762   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4763 
4764   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4765   for (i=0; i<merge->nrecv; i++) {
4766     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4767   }
4768 
4769   ierr = PetscFree(len_si);CHKERRQ(ierr);
4770   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4771   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4772   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4773   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4774   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4775   ierr = PetscFree(status);CHKERRQ(ierr);
4776 
4777   /* compute a local seq matrix in each processor */
4778   /*----------------------------------------------*/
4779   /* allocate bi array and free space for accumulating nonzero column info */
4780   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4781   bi[0] = 0;
4782 
4783   /* create and initialize a linked list */
4784   nlnk = N+1;
4785   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4786 
4787   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4788   len  = ai[owners[rank+1]] - ai[owners[rank]];
4789   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4790 
4791   current_space = free_space;
4792 
4793   /* determine symbolic info for each local row */
4794   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4795 
4796   for (k=0; k<merge->nrecv; k++) {
4797     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4798     nrows       = *buf_ri_k[k];
4799     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4800     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4801   }
4802 
4803   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4804   len  = 0;
4805   for (i=0; i<m; i++) {
4806     bnzi = 0;
4807     /* add local non-zero cols of this proc's seqmat into lnk */
4808     arow  = owners[rank] + i;
4809     anzi  = ai[arow+1] - ai[arow];
4810     aj    = a->j + ai[arow];
4811     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4812     bnzi += nlnk;
4813     /* add received col data into lnk */
4814     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4815       if (i == *nextrow[k]) { /* i-th row */
4816         anzi  = *(nextai[k]+1) - *nextai[k];
4817         aj    = buf_rj[k] + *nextai[k];
4818         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4819         bnzi += nlnk;
4820         nextrow[k]++; nextai[k]++;
4821       }
4822     }
4823     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4824 
4825     /* if free space is not available, make more free space */
4826     if (current_space->local_remaining<bnzi) {
4827       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4828       nspacedouble++;
4829     }
4830     /* copy data into free space, then initialize lnk */
4831     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4832     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4833 
4834     current_space->array           += bnzi;
4835     current_space->local_used      += bnzi;
4836     current_space->local_remaining -= bnzi;
4837 
4838     bi[i+1] = bi[i] + bnzi;
4839   }
4840 
4841   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4842 
4843   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4844   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4845   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4846 
4847   /* create symbolic parallel matrix B_mpi */
4848   /*---------------------------------------*/
4849   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4850   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4851   if (n==PETSC_DECIDE) {
4852     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4853   } else {
4854     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4855   }
4856   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4857   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4858   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4859   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4860   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4861 
4862   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4863   B_mpi->assembled    = PETSC_FALSE;
4864   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4865   merge->bi           = bi;
4866   merge->bj           = bj;
4867   merge->buf_ri       = buf_ri;
4868   merge->buf_rj       = buf_rj;
4869   merge->coi          = NULL;
4870   merge->coj          = NULL;
4871   merge->owners_co    = NULL;
4872 
4873   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4874 
4875   /* attach the supporting struct to B_mpi for reuse */
4876   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4877   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4878   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4879   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4880   *mpimat = B_mpi;
4881 
4882   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4883   PetscFunctionReturn(0);
4884 }
4885 
4886 #undef __FUNCT__
4887 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4888 /*@C
4889       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4890                  matrices from each processor
4891 
4892     Collective on MPI_Comm
4893 
4894    Input Parameters:
4895 +    comm - the communicators the parallel matrix will live on
4896 .    seqmat - the input sequential matrices
4897 .    m - number of local rows (or PETSC_DECIDE)
4898 .    n - number of local columns (or PETSC_DECIDE)
4899 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4900 
4901    Output Parameter:
4902 .    mpimat - the parallel matrix generated
4903 
4904     Level: advanced
4905 
4906    Notes:
4907      The dimensions of the sequential matrix in each processor MUST be the same.
4908      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4909      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4910 @*/
4911 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4912 {
4913   PetscErrorCode ierr;
4914   PetscMPIInt    size;
4915 
4916   PetscFunctionBegin;
4917   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4918   if (size == 1) {
4919     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920     if (scall == MAT_INITIAL_MATRIX) {
4921       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4922     } else {
4923       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4924     }
4925     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4926     PetscFunctionReturn(0);
4927   }
4928   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4929   if (scall == MAT_INITIAL_MATRIX) {
4930     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4931   }
4932   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4933   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4934   PetscFunctionReturn(0);
4935 }
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4939 /*@
4940      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4941           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4942           with MatGetSize()
4943 
4944     Not Collective
4945 
4946    Input Parameters:
4947 +    A - the matrix
4948 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4949 
4950    Output Parameter:
4951 .    A_loc - the local sequential matrix generated
4952 
4953     Level: developer
4954 
4955 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4956 
4957 @*/
4958 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4959 {
4960   PetscErrorCode ierr;
4961   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4962   Mat_SeqAIJ     *mat,*a,*b;
4963   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4964   MatScalar      *aa,*ba,*cam;
4965   PetscScalar    *ca;
4966   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4967   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4968   PetscBool      match;
4969 
4970   PetscFunctionBegin;
4971   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4972   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4973   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4974   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4975   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4976   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4977   aa = a->a; ba = b->a;
4978   if (scall == MAT_INITIAL_MATRIX) {
4979     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4980     ci[0] = 0;
4981     for (i=0; i<am; i++) {
4982       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4983     }
4984     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4985     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4986     k    = 0;
4987     for (i=0; i<am; i++) {
4988       ncols_o = bi[i+1] - bi[i];
4989       ncols_d = ai[i+1] - ai[i];
4990       /* off-diagonal portion of A */
4991       for (jo=0; jo<ncols_o; jo++) {
4992         col = cmap[*bj];
4993         if (col >= cstart) break;
4994         cj[k]   = col; bj++;
4995         ca[k++] = *ba++;
4996       }
4997       /* diagonal portion of A */
4998       for (j=0; j<ncols_d; j++) {
4999         cj[k]   = cstart + *aj++;
5000         ca[k++] = *aa++;
5001       }
5002       /* off-diagonal portion of A */
5003       for (j=jo; j<ncols_o; j++) {
5004         cj[k]   = cmap[*bj++];
5005         ca[k++] = *ba++;
5006       }
5007     }
5008     /* put together the new matrix */
5009     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5010     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5011     /* Since these are PETSc arrays, change flags to free them as necessary. */
5012     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5013     mat->free_a  = PETSC_TRUE;
5014     mat->free_ij = PETSC_TRUE;
5015     mat->nonew   = 0;
5016   } else if (scall == MAT_REUSE_MATRIX) {
5017     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5018     ci = mat->i; cj = mat->j; cam = mat->a;
5019     for (i=0; i<am; i++) {
5020       /* off-diagonal portion of A */
5021       ncols_o = bi[i+1] - bi[i];
5022       for (jo=0; jo<ncols_o; jo++) {
5023         col = cmap[*bj];
5024         if (col >= cstart) break;
5025         *cam++ = *ba++; bj++;
5026       }
5027       /* diagonal portion of A */
5028       ncols_d = ai[i+1] - ai[i];
5029       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5030       /* off-diagonal portion of A */
5031       for (j=jo; j<ncols_o; j++) {
5032         *cam++ = *ba++; bj++;
5033       }
5034     }
5035   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5036   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5037   PetscFunctionReturn(0);
5038 }
5039 
5040 #undef __FUNCT__
5041 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5042 /*@C
5043      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5044 
5045     Not Collective
5046 
5047    Input Parameters:
5048 +    A - the matrix
5049 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5050 -    row, col - index sets of rows and columns to extract (or NULL)
5051 
5052    Output Parameter:
5053 .    A_loc - the local sequential matrix generated
5054 
5055     Level: developer
5056 
5057 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5058 
5059 @*/
5060 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5061 {
5062   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5063   PetscErrorCode ierr;
5064   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5065   IS             isrowa,iscola;
5066   Mat            *aloc;
5067   PetscBool      match;
5068 
5069   PetscFunctionBegin;
5070   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5071   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5072   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5073   if (!row) {
5074     start = A->rmap->rstart; end = A->rmap->rend;
5075     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5076   } else {
5077     isrowa = *row;
5078   }
5079   if (!col) {
5080     start = A->cmap->rstart;
5081     cmap  = a->garray;
5082     nzA   = a->A->cmap->n;
5083     nzB   = a->B->cmap->n;
5084     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5085     ncols = 0;
5086     for (i=0; i<nzB; i++) {
5087       if (cmap[i] < start) idx[ncols++] = cmap[i];
5088       else break;
5089     }
5090     imark = i;
5091     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5092     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5093     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5094   } else {
5095     iscola = *col;
5096   }
5097   if (scall != MAT_INITIAL_MATRIX) {
5098     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5099     aloc[0] = *A_loc;
5100   }
5101   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5102   *A_loc = aloc[0];
5103   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5104   if (!row) {
5105     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5106   }
5107   if (!col) {
5108     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5109   }
5110   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5111   PetscFunctionReturn(0);
5112 }
5113 
5114 #undef __FUNCT__
5115 #define __FUNCT__ "MatGetBrowsOfAcols"
5116 /*@C
5117     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5118 
5119     Collective on Mat
5120 
5121    Input Parameters:
5122 +    A,B - the matrices in mpiaij format
5123 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5124 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5125 
5126    Output Parameter:
5127 +    rowb, colb - index sets of rows and columns of B to extract
5128 -    B_seq - the sequential matrix generated
5129 
5130     Level: developer
5131 
5132 @*/
5133 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5134 {
5135   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5136   PetscErrorCode ierr;
5137   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5138   IS             isrowb,iscolb;
5139   Mat            *bseq=NULL;
5140 
5141   PetscFunctionBegin;
5142   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5143     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5144   }
5145   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5146 
5147   if (scall == MAT_INITIAL_MATRIX) {
5148     start = A->cmap->rstart;
5149     cmap  = a->garray;
5150     nzA   = a->A->cmap->n;
5151     nzB   = a->B->cmap->n;
5152     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5153     ncols = 0;
5154     for (i=0; i<nzB; i++) {  /* row < local row index */
5155       if (cmap[i] < start) idx[ncols++] = cmap[i];
5156       else break;
5157     }
5158     imark = i;
5159     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5160     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5161     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5162     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5163   } else {
5164     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5165     isrowb  = *rowb; iscolb = *colb;
5166     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5167     bseq[0] = *B_seq;
5168   }
5169   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5170   *B_seq = bseq[0];
5171   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5172   if (!rowb) {
5173     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5174   } else {
5175     *rowb = isrowb;
5176   }
5177   if (!colb) {
5178     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5179   } else {
5180     *colb = iscolb;
5181   }
5182   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5183   PetscFunctionReturn(0);
5184 }
5185 
5186 #undef __FUNCT__
5187 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5188 /*
5189     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5190     of the OFF-DIAGONAL portion of local A
5191 
5192     Collective on Mat
5193 
5194    Input Parameters:
5195 +    A,B - the matrices in mpiaij format
5196 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5197 
5198    Output Parameter:
5199 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5200 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5201 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5202 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5203 
5204     Level: developer
5205 
5206 */
5207 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5208 {
5209   VecScatter_MPI_General *gen_to,*gen_from;
5210   PetscErrorCode         ierr;
5211   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5212   Mat_SeqAIJ             *b_oth;
5213   VecScatter             ctx =a->Mvctx;
5214   MPI_Comm               comm;
5215   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5216   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5217   PetscScalar            *rvalues,*svalues;
5218   MatScalar              *b_otha,*bufa,*bufA;
5219   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5220   MPI_Request            *rwaits = NULL,*swaits = NULL;
5221   MPI_Status             *sstatus,rstatus;
5222   PetscMPIInt            jj;
5223   PetscInt               *cols,sbs,rbs;
5224   PetscScalar            *vals;
5225 
5226   PetscFunctionBegin;
5227   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5228   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5229     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5230   }
5231   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5232   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5233 
5234   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5235   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5236   rvalues  = gen_from->values; /* holds the length of receiving row */
5237   svalues  = gen_to->values;   /* holds the length of sending row */
5238   nrecvs   = gen_from->n;
5239   nsends   = gen_to->n;
5240 
5241   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5242   srow    = gen_to->indices;    /* local row index to be sent */
5243   sstarts = gen_to->starts;
5244   sprocs  = gen_to->procs;
5245   sstatus = gen_to->sstatus;
5246   sbs     = gen_to->bs;
5247   rstarts = gen_from->starts;
5248   rprocs  = gen_from->procs;
5249   rbs     = gen_from->bs;
5250 
5251   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5252   if (scall == MAT_INITIAL_MATRIX) {
5253     /* i-array */
5254     /*---------*/
5255     /*  post receives */
5256     for (i=0; i<nrecvs; i++) {
5257       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5258       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5259       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5260     }
5261 
5262     /* pack the outgoing message */
5263     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5264 
5265     sstartsj[0] = 0;
5266     rstartsj[0] = 0;
5267     len         = 0; /* total length of j or a array to be sent */
5268     k           = 0;
5269     for (i=0; i<nsends; i++) {
5270       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5271       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5272       for (j=0; j<nrows; j++) {
5273         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5274         for (l=0; l<sbs; l++) {
5275           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5276 
5277           rowlen[j*sbs+l] = ncols;
5278 
5279           len += ncols;
5280           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5281         }
5282         k++;
5283       }
5284       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5285 
5286       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5287     }
5288     /* recvs and sends of i-array are completed */
5289     i = nrecvs;
5290     while (i--) {
5291       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5292     }
5293     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5294 
5295     /* allocate buffers for sending j and a arrays */
5296     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5297     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5298 
5299     /* create i-array of B_oth */
5300     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5301 
5302     b_othi[0] = 0;
5303     len       = 0; /* total length of j or a array to be received */
5304     k         = 0;
5305     for (i=0; i<nrecvs; i++) {
5306       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5307       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5308       for (j=0; j<nrows; j++) {
5309         b_othi[k+1] = b_othi[k] + rowlen[j];
5310         len        += rowlen[j]; k++;
5311       }
5312       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5313     }
5314 
5315     /* allocate space for j and a arrrays of B_oth */
5316     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5317     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5318 
5319     /* j-array */
5320     /*---------*/
5321     /*  post receives of j-array */
5322     for (i=0; i<nrecvs; i++) {
5323       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5324       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5325     }
5326 
5327     /* pack the outgoing message j-array */
5328     k = 0;
5329     for (i=0; i<nsends; i++) {
5330       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5331       bufJ  = bufj+sstartsj[i];
5332       for (j=0; j<nrows; j++) {
5333         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5334         for (ll=0; ll<sbs; ll++) {
5335           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5336           for (l=0; l<ncols; l++) {
5337             *bufJ++ = cols[l];
5338           }
5339           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5340         }
5341       }
5342       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5343     }
5344 
5345     /* recvs and sends of j-array are completed */
5346     i = nrecvs;
5347     while (i--) {
5348       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5349     }
5350     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5351   } else if (scall == MAT_REUSE_MATRIX) {
5352     sstartsj = *startsj_s;
5353     rstartsj = *startsj_r;
5354     bufa     = *bufa_ptr;
5355     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5356     b_otha   = b_oth->a;
5357   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5358 
5359   /* a-array */
5360   /*---------*/
5361   /*  post receives of a-array */
5362   for (i=0; i<nrecvs; i++) {
5363     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5364     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5365   }
5366 
5367   /* pack the outgoing message a-array */
5368   k = 0;
5369   for (i=0; i<nsends; i++) {
5370     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5371     bufA  = bufa+sstartsj[i];
5372     for (j=0; j<nrows; j++) {
5373       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5374       for (ll=0; ll<sbs; ll++) {
5375         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5376         for (l=0; l<ncols; l++) {
5377           *bufA++ = vals[l];
5378         }
5379         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5380       }
5381     }
5382     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5383   }
5384   /* recvs and sends of a-array are completed */
5385   i = nrecvs;
5386   while (i--) {
5387     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5388   }
5389   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5390   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5391 
5392   if (scall == MAT_INITIAL_MATRIX) {
5393     /* put together the new matrix */
5394     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5395 
5396     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5397     /* Since these are PETSc arrays, change flags to free them as necessary. */
5398     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5399     b_oth->free_a  = PETSC_TRUE;
5400     b_oth->free_ij = PETSC_TRUE;
5401     b_oth->nonew   = 0;
5402 
5403     ierr = PetscFree(bufj);CHKERRQ(ierr);
5404     if (!startsj_s || !bufa_ptr) {
5405       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5406       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5407     } else {
5408       *startsj_s = sstartsj;
5409       *startsj_r = rstartsj;
5410       *bufa_ptr  = bufa;
5411     }
5412   }
5413   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5414   PetscFunctionReturn(0);
5415 }
5416 
5417 #undef __FUNCT__
5418 #define __FUNCT__ "MatGetCommunicationStructs"
5419 /*@C
5420   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5421 
5422   Not Collective
5423 
5424   Input Parameters:
5425 . A - The matrix in mpiaij format
5426 
5427   Output Parameter:
5428 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5429 . colmap - A map from global column index to local index into lvec
5430 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5431 
5432   Level: developer
5433 
5434 @*/
5435 #if defined(PETSC_USE_CTABLE)
5436 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5437 #else
5438 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5439 #endif
5440 {
5441   Mat_MPIAIJ *a;
5442 
5443   PetscFunctionBegin;
5444   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5445   PetscValidPointer(lvec, 2);
5446   PetscValidPointer(colmap, 3);
5447   PetscValidPointer(multScatter, 4);
5448   a = (Mat_MPIAIJ*) A->data;
5449   if (lvec) *lvec = a->lvec;
5450   if (colmap) *colmap = a->colmap;
5451   if (multScatter) *multScatter = a->Mvctx;
5452   PetscFunctionReturn(0);
5453 }
5454 
5455 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5458 
5459 #undef __FUNCT__
5460 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5461 /*
5462     Computes (B'*A')' since computing B*A directly is untenable
5463 
5464                n                       p                          p
5465         (              )       (              )         (                  )
5466       m (      A       )  *  n (       B      )   =   m (         C        )
5467         (              )       (              )         (                  )
5468 
5469 */
5470 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5471 {
5472   PetscErrorCode ierr;
5473   Mat            At,Bt,Ct;
5474 
5475   PetscFunctionBegin;
5476   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5477   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5478   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5479   ierr = MatDestroy(&At);CHKERRQ(ierr);
5480   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5481   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5482   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5483   PetscFunctionReturn(0);
5484 }
5485 
5486 #undef __FUNCT__
5487 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5488 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5489 {
5490   PetscErrorCode ierr;
5491   PetscInt       m=A->rmap->n,n=B->cmap->n;
5492   Mat            Cmat;
5493 
5494   PetscFunctionBegin;
5495   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5496   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5497   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5498   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5499   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5500   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5501   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5502   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5503 
5504   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5505 
5506   *C = Cmat;
5507   PetscFunctionReturn(0);
5508 }
5509 
5510 /* ----------------------------------------------------------------*/
5511 #undef __FUNCT__
5512 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5513 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5514 {
5515   PetscErrorCode ierr;
5516 
5517   PetscFunctionBegin;
5518   if (scall == MAT_INITIAL_MATRIX) {
5519     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5520     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5521     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5522   }
5523   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5524   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5525   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5526   PetscFunctionReturn(0);
5527 }
5528 
5529 #if defined(PETSC_HAVE_MUMPS)
5530 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5531 #endif
5532 #if defined(PETSC_HAVE_PASTIX)
5533 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5534 #endif
5535 #if defined(PETSC_HAVE_SUPERLU_DIST)
5536 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5537 #endif
5538 #if defined(PETSC_HAVE_CLIQUE)
5539 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5540 #endif
5541 
5542 /*MC
5543    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5544 
5545    Options Database Keys:
5546 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5547 
5548   Level: beginner
5549 
5550 .seealso: MatCreateAIJ()
5551 M*/
5552 
5553 #undef __FUNCT__
5554 #define __FUNCT__ "MatCreate_MPIAIJ"
5555 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5556 {
5557   Mat_MPIAIJ     *b;
5558   PetscErrorCode ierr;
5559   PetscMPIInt    size;
5560 
5561   PetscFunctionBegin;
5562   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5563 
5564   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5565   B->data       = (void*)b;
5566   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5567   B->assembled  = PETSC_FALSE;
5568   B->insertmode = NOT_SET_VALUES;
5569   b->size       = size;
5570 
5571   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5572 
5573   /* build cache for off array entries formed */
5574   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5575 
5576   b->donotstash  = PETSC_FALSE;
5577   b->colmap      = 0;
5578   b->garray      = 0;
5579   b->roworiented = PETSC_TRUE;
5580 
5581   /* stuff used for matrix vector multiply */
5582   b->lvec  = NULL;
5583   b->Mvctx = NULL;
5584 
5585   /* stuff for MatGetRow() */
5586   b->rowindices   = 0;
5587   b->rowvalues    = 0;
5588   b->getrowactive = PETSC_FALSE;
5589 
5590   /* flexible pointer used in CUSP/CUSPARSE classes */
5591   b->spptr = NULL;
5592 
5593 #if defined(PETSC_HAVE_MUMPS)
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5595 #endif
5596 #if defined(PETSC_HAVE_PASTIX)
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5598 #endif
5599 #if defined(PETSC_HAVE_SUPERLU_DIST)
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5601 #endif
5602 #if defined(PETSC_HAVE_CLIQUE)
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5604 #endif
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5619   PetscFunctionReturn(0);
5620 }
5621 
5622 #undef __FUNCT__
5623 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5624 /*@
5625      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5626          and "off-diagonal" part of the matrix in CSR format.
5627 
5628    Collective on MPI_Comm
5629 
5630    Input Parameters:
5631 +  comm - MPI communicator
5632 .  m - number of local rows (Cannot be PETSC_DECIDE)
5633 .  n - This value should be the same as the local size used in creating the
5634        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5635        calculated if N is given) For square matrices n is almost always m.
5636 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5637 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5638 .   i - row indices for "diagonal" portion of matrix
5639 .   j - column indices
5640 .   a - matrix values
5641 .   oi - row indices for "off-diagonal" portion of matrix
5642 .   oj - column indices
5643 -   oa - matrix values
5644 
5645    Output Parameter:
5646 .   mat - the matrix
5647 
5648    Level: advanced
5649 
5650    Notes:
5651        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5652        must free the arrays once the matrix has been destroyed and not before.
5653 
5654        The i and j indices are 0 based
5655 
5656        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5657 
5658        This sets local rows and cannot be used to set off-processor values.
5659 
5660        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5661        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5662        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5663        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5664        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5665        communication if it is known that only local entries will be set.
5666 
5667 .keywords: matrix, aij, compressed row, sparse, parallel
5668 
5669 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5670           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5671 @*/
5672 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5673 {
5674   PetscErrorCode ierr;
5675   Mat_MPIAIJ     *maij;
5676 
5677   PetscFunctionBegin;
5678   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5679   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5680   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5681   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5682   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5683   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5684   maij = (Mat_MPIAIJ*) (*mat)->data;
5685 
5686   (*mat)->preallocated = PETSC_TRUE;
5687 
5688   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5689   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5690 
5691   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5692   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5693 
5694   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698 
5699   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5702   PetscFunctionReturn(0);
5703 }
5704 
5705 /*
5706     Special version for direct calls from Fortran
5707 */
5708 #include <petsc-private/fortranimpl.h>
5709 
5710 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5711 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5712 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5713 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5714 #endif
5715 
5716 /* Change these macros so can be used in void function */
5717 #undef CHKERRQ
5718 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5719 #undef SETERRQ2
5720 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5721 #undef SETERRQ3
5722 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5723 #undef SETERRQ
5724 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5725 
5726 #undef __FUNCT__
5727 #define __FUNCT__ "matsetvaluesmpiaij_"
5728 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5729 {
5730   Mat            mat  = *mmat;
5731   PetscInt       m    = *mm, n = *mn;
5732   InsertMode     addv = *maddv;
5733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5734   PetscScalar    value;
5735   PetscErrorCode ierr;
5736 
5737   MatCheckPreallocated(mat,1);
5738   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5739 
5740 #if defined(PETSC_USE_DEBUG)
5741   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5742 #endif
5743   {
5744     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5745     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5746     PetscBool roworiented = aij->roworiented;
5747 
5748     /* Some Variables required in the macro */
5749     Mat        A                 = aij->A;
5750     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5751     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5752     MatScalar  *aa               = a->a;
5753     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5754     Mat        B                 = aij->B;
5755     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5756     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5757     MatScalar  *ba               = b->a;
5758 
5759     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5760     PetscInt  nonew = a->nonew;
5761     MatScalar *ap1,*ap2;
5762 
5763     PetscFunctionBegin;
5764     for (i=0; i<m; i++) {
5765       if (im[i] < 0) continue;
5766 #if defined(PETSC_USE_DEBUG)
5767       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5768 #endif
5769       if (im[i] >= rstart && im[i] < rend) {
5770         row      = im[i] - rstart;
5771         lastcol1 = -1;
5772         rp1      = aj + ai[row];
5773         ap1      = aa + ai[row];
5774         rmax1    = aimax[row];
5775         nrow1    = ailen[row];
5776         low1     = 0;
5777         high1    = nrow1;
5778         lastcol2 = -1;
5779         rp2      = bj + bi[row];
5780         ap2      = ba + bi[row];
5781         rmax2    = bimax[row];
5782         nrow2    = bilen[row];
5783         low2     = 0;
5784         high2    = nrow2;
5785 
5786         for (j=0; j<n; j++) {
5787           if (roworiented) value = v[i*n+j];
5788           else value = v[i+j*m];
5789           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5790           if (in[j] >= cstart && in[j] < cend) {
5791             col = in[j] - cstart;
5792             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5793           } else if (in[j] < 0) continue;
5794 #if defined(PETSC_USE_DEBUG)
5795           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5796 #endif
5797           else {
5798             if (mat->was_assembled) {
5799               if (!aij->colmap) {
5800                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5801               }
5802 #if defined(PETSC_USE_CTABLE)
5803               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5804               col--;
5805 #else
5806               col = aij->colmap[in[j]] - 1;
5807 #endif
5808               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5809                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5810                 col  =  in[j];
5811                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5812                 B     = aij->B;
5813                 b     = (Mat_SeqAIJ*)B->data;
5814                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5815                 rp2   = bj + bi[row];
5816                 ap2   = ba + bi[row];
5817                 rmax2 = bimax[row];
5818                 nrow2 = bilen[row];
5819                 low2  = 0;
5820                 high2 = nrow2;
5821                 bm    = aij->B->rmap->n;
5822                 ba    = b->a;
5823               }
5824             } else col = in[j];
5825             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5826           }
5827         }
5828       } else if (!aij->donotstash) {
5829         if (roworiented) {
5830           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5831         } else {
5832           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5833         }
5834       }
5835     }
5836   }
5837   PetscFunctionReturnVoid();
5838 }
5839 
5840