xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 48c0d076c22679d91eb4eee26770f8774147511c)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   /* do nondiagonal part */
1048   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1049   /* send it on its way */
1050   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   /* do local part */
1052   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1053   /* receive remote parts */
1054   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /*
1059   This only works correctly for square matrices where the subblock A->A is the
1060    diagonal block
1061 */
1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1063 {
1064   PetscErrorCode ierr;
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066 
1067   PetscFunctionBegin;
1068   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1069   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1070   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1075 {
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1081   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1086 {
1087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091 #if defined(PETSC_USE_LOG)
1092   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1093 #endif
1094   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1095   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1096   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1098 #if defined(PETSC_USE_CTABLE)
1099   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1100 #else
1101   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1102 #endif
1103   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1105   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1106   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1107   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1108   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1109 
1110   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1111   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1112   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1114   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1118 #if defined(PETSC_HAVE_ELEMENTAL)
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1120 #endif
1121 #if defined(PETSC_HAVE_HYPRE)
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1124 #endif
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1129 {
1130   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1131   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1132   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1133   PetscErrorCode ierr;
1134   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1135   int            fd;
1136   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1137   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1138   PetscScalar    *column_values;
1139   PetscInt       message_count,flowcontrolcount;
1140   FILE           *file;
1141 
1142   PetscFunctionBegin;
1143   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1144   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1145   nz   = A->nz + B->nz;
1146   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1147   if (!rank) {
1148     header[0] = MAT_FILE_CLASSID;
1149     header[1] = mat->rmap->N;
1150     header[2] = mat->cmap->N;
1151 
1152     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1153     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1154     /* get largest number of rows any processor has */
1155     rlen  = mat->rmap->n;
1156     range = mat->rmap->range;
1157     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1158   } else {
1159     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     rlen = mat->rmap->n;
1161   }
1162 
1163   /* load up the local row counts */
1164   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1165   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1166 
1167   /* store the row lengths to the file */
1168   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1169   if (!rank) {
1170     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     for (i=1; i<size; i++) {
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       rlen = range[i+1] - range[i];
1174       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1178   } else {
1179     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1180     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1184 
1185   /* load up the local column indices */
1186   nzmax = nz; /* th processor needs space a largest processor needs */
1187   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1189   cnt   = 0;
1190   for (i=0; i<mat->rmap->n; i++) {
1191     for (j=B->i[i]; j<B->i[i+1]; j++) {
1192       if ((col = garray[B->j[j]]) > cstart) break;
1193       column_indices[cnt++] = col;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1196     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1197   }
1198   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1199 
1200   /* store the column indices to the file */
1201   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1202   if (!rank) {
1203     MPI_Status status;
1204     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     for (i=1; i<size; i++) {
1206       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1207       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1208       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1209       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1218   }
1219   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1220 
1221   /* load up the local column values */
1222   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1223   cnt  = 0;
1224   for (i=0; i<mat->rmap->n; i++) {
1225     for (j=B->i[i]; j<B->i[i+1]; j++) {
1226       if (garray[B->j[j]] > cstart) break;
1227       column_values[cnt++] = B->a[j];
1228     }
1229     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1230     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1231   }
1232   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1233 
1234   /* store the column values to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     MPI_Status status;
1238     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1242       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1243       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1245     }
1246     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1247   } else {
1248     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1249     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(column_values);CHKERRQ(ierr);
1254 
1255   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1256   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #include <petscdraw.h>
1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1264   PetscErrorCode    ierr;
1265   PetscMPIInt       rank = aij->rank,size = aij->size;
1266   PetscBool         isdraw,iascii,isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1273   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii) {
1275     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1276     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1277       MatInfo   info;
1278       PetscBool inodes;
1279 
1280       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1281       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1282       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1283       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1284       if (!inodes) {
1285         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1286                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1287       } else {
1288         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1289                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1290       }
1291       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1292       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1293       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1295       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1296       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1297       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1298       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1299       PetscFunctionReturn(0);
1300     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1301       PetscInt inodecount,inodelimit,*inodes;
1302       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1303       if (inodes) {
1304         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1305       } else {
1306         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1307       }
1308       PetscFunctionReturn(0);
1309     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1310       PetscFunctionReturn(0);
1311     }
1312   } else if (isbinary) {
1313     if (size == 1) {
1314       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1315       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1316     } else {
1317       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1318     }
1319     PetscFunctionReturn(0);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1324     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1325     if (isnull) PetscFunctionReturn(0);
1326   }
1327 
1328   {
1329     /* assemble the entire matrix onto first processor. */
1330     Mat        A;
1331     Mat_SeqAIJ *Aloc;
1332     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1333     MatScalar  *a;
1334 
1335     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1336     if (!rank) {
1337       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1340     }
1341     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1342     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1343     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1344     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1345     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1346 
1347     /* copy over the A part */
1348     Aloc = (Mat_SeqAIJ*)aij->A->data;
1349     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1350     row  = mat->rmap->rstart;
1351     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1352     for (i=0; i<m; i++) {
1353       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1354       row++;
1355       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1356     }
1357     aj = Aloc->j;
1358     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1359 
1360     /* copy over the B part */
1361     Aloc = (Mat_SeqAIJ*)aij->B->data;
1362     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1363     row  = mat->rmap->rstart;
1364     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1365     ct   = cols;
1366     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1367     for (i=0; i<m; i++) {
1368       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1369       row++;
1370       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1371     }
1372     ierr = PetscFree(ct);CHKERRQ(ierr);
1373     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     /*
1376        Everyone has to call to draw the matrix since the graphics waits are
1377        synchronized across all processors that share the PetscDraw object
1378     */
1379     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1380     if (!rank) {
1381       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1382       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1383     }
1384     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1385     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1386     ierr = MatDestroy(&A);CHKERRQ(ierr);
1387   }
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1392 {
1393   PetscErrorCode ierr;
1394   PetscBool      iascii,isdraw,issocket,isbinary;
1395 
1396   PetscFunctionBegin;
1397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1401   if (iascii || isdraw || isbinary || issocket) {
1402     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1408 {
1409   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1410   PetscErrorCode ierr;
1411   Vec            bb1 = 0;
1412   PetscBool      hasop;
1413 
1414   PetscFunctionBegin;
1415   if (flag == SOR_APPLY_UPPER) {
1416     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1417     PetscFunctionReturn(0);
1418   }
1419 
1420   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1421     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1422   }
1423 
1424   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1425     if (flag & SOR_ZERO_INITIAL_GUESS) {
1426       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1427       its--;
1428     }
1429 
1430     while (its--) {
1431       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1432       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1433 
1434       /* update rhs: bb1 = bb - B*x */
1435       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1436       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1437 
1438       /* local sweep */
1439       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1440     }
1441   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1444       its--;
1445     }
1446     while (its--) {
1447       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1452       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1453 
1454       /* local sweep */
1455       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1456     }
1457   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1458     if (flag & SOR_ZERO_INITIAL_GUESS) {
1459       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1460       its--;
1461     }
1462     while (its--) {
1463       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465 
1466       /* update rhs: bb1 = bb - B*x */
1467       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1468       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1469 
1470       /* local sweep */
1471       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1472     }
1473   } else if (flag & SOR_EISENSTAT) {
1474     Vec xx1;
1475 
1476     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1477     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1478 
1479     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481     if (!mat->diag) {
1482       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1483       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1484     }
1485     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1486     if (hasop) {
1487       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1488     } else {
1489       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1490     }
1491     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1492 
1493     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1494 
1495     /* local sweep */
1496     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1497     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1498     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1499   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1500 
1501   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1502 
1503   matin->factorerrortype = mat->A->factorerrortype;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1508 {
1509   Mat            aA,aB,Aperm;
1510   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1511   PetscScalar    *aa,*ba;
1512   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1513   PetscSF        rowsf,sf;
1514   IS             parcolp = NULL;
1515   PetscBool      done;
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1520   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1521   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1522   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1523 
1524   /* Invert row permutation to find out where my rows should go */
1525   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1526   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1527   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1528   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1529   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1531 
1532   /* Invert column permutation to find out where my columns should go */
1533   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1534   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1535   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1536   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1537   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1538   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1540 
1541   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1542   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1543   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1544 
1545   /* Find out where my gcols should go */
1546   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1547   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1548   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1549   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1550   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1551   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1552   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1553   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1554 
1555   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1556   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1557   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1558   for (i=0; i<m; i++) {
1559     PetscInt row = rdest[i],rowner;
1560     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1561     for (j=ai[i]; j<ai[i+1]; j++) {
1562       PetscInt cowner,col = cdest[aj[j]];
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567     for (j=bi[i]; j<bi[i+1]; j++) {
1568       PetscInt cowner,col = gcdest[bj[j]];
1569       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1570       if (rowner == cowner) dnnz[i]++;
1571       else onnz[i]++;
1572     }
1573   }
1574   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1575   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1579 
1580   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1581   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1582   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1585     PetscInt j0,rowlen;
1586     rowlen = ai[i+1] - ai[i];
1587     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1588       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1589       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1590     }
1591     rowlen = bi[i+1] - bi[i];
1592     for (j0=j=0; j<rowlen; j0=j) {
1593       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1594       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1595     }
1596   }
1597   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1598   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1600   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1601   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1602   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1603   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1604   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1605   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1606   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1607   *B = Aperm;
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1612 {
1613   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1614   PetscErrorCode ierr;
1615 
1616   PetscFunctionBegin;
1617   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1618   if (ghosts) *ghosts = aij->garray;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   Mat            A    = mat->A,B = mat->B;
1626   PetscErrorCode ierr;
1627   PetscReal      isend[5],irecv[5];
1628 
1629   PetscFunctionBegin;
1630   info->block_size = 1.0;
1631   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1632 
1633   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1634   isend[3] = info->memory;  isend[4] = info->mallocs;
1635 
1636   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1637 
1638   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1639   isend[3] += info->memory;  isend[4] += info->mallocs;
1640   if (flag == MAT_LOCAL) {
1641     info->nz_used      = isend[0];
1642     info->nz_allocated = isend[1];
1643     info->nz_unneeded  = isend[2];
1644     info->memory       = isend[3];
1645     info->mallocs      = isend[4];
1646   } else if (flag == MAT_GLOBAL_MAX) {
1647     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1648 
1649     info->nz_used      = irecv[0];
1650     info->nz_allocated = irecv[1];
1651     info->nz_unneeded  = irecv[2];
1652     info->memory       = irecv[3];
1653     info->mallocs      = irecv[4];
1654   } else if (flag == MAT_GLOBAL_SUM) {
1655     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1656 
1657     info->nz_used      = irecv[0];
1658     info->nz_allocated = irecv[1];
1659     info->nz_unneeded  = irecv[2];
1660     info->memory       = irecv[3];
1661     info->mallocs      = irecv[4];
1662   }
1663   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1664   info->fill_ratio_needed = 0;
1665   info->factor_mallocs    = 0;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1670 {
1671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   switch (op) {
1676   case MAT_NEW_NONZERO_LOCATIONS:
1677   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1678   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1679   case MAT_KEEP_NONZERO_PATTERN:
1680   case MAT_NEW_NONZERO_LOCATION_ERR:
1681   case MAT_USE_INODES:
1682   case MAT_IGNORE_ZERO_ENTRIES:
1683     MatCheckPreallocated(A,1);
1684     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1685     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1686     break;
1687   case MAT_ROW_ORIENTED:
1688     MatCheckPreallocated(A,1);
1689     a->roworiented = flg;
1690 
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1693     break;
1694   case MAT_NEW_DIAGONALS:
1695     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1696     break;
1697   case MAT_IGNORE_OFF_PROC_ENTRIES:
1698     a->donotstash = flg;
1699     break;
1700   case MAT_SPD:
1701     A->spd_set = PETSC_TRUE;
1702     A->spd     = flg;
1703     if (flg) {
1704       A->symmetric                  = PETSC_TRUE;
1705       A->structurally_symmetric     = PETSC_TRUE;
1706       A->symmetric_set              = PETSC_TRUE;
1707       A->structurally_symmetric_set = PETSC_TRUE;
1708     }
1709     break;
1710   case MAT_SYMMETRIC:
1711     MatCheckPreallocated(A,1);
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     break;
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715     MatCheckPreallocated(A,1);
1716     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1717     break;
1718   case MAT_HERMITIAN:
1719     MatCheckPreallocated(A,1);
1720     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1721     break;
1722   case MAT_SYMMETRY_ETERNAL:
1723     MatCheckPreallocated(A,1);
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1739 {
1740   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1741   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1742   PetscErrorCode ierr;
1743   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1744   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1745   PetscInt       *cmap,*idx_p;
1746 
1747   PetscFunctionBegin;
1748   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1749   mat->getrowactive = PETSC_TRUE;
1750 
1751   if (!mat->rowvalues && (idx || v)) {
1752     /*
1753         allocate enough space to hold information from the longest row.
1754     */
1755     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1756     PetscInt   max = 1,tmp;
1757     for (i=0; i<matin->rmap->n; i++) {
1758       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1759       if (max < tmp) max = tmp;
1760     }
1761     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1762   }
1763 
1764   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1765   lrow = row - rstart;
1766 
1767   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1768   if (!v)   {pvA = 0; pvB = 0;}
1769   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1770   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1771   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1772   nztot = nzA + nzB;
1773 
1774   cmap = mat->garray;
1775   if (v  || idx) {
1776     if (nztot) {
1777       /* Sort by increasing column numbers, assuming A and B already sorted */
1778       PetscInt imark = -1;
1779       if (v) {
1780         *v = v_p = mat->rowvalues;
1781         for (i=0; i<nzB; i++) {
1782           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1783           else break;
1784         }
1785         imark = i;
1786         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1787         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1788       }
1789       if (idx) {
1790         *idx = idx_p = mat->rowindices;
1791         if (imark > -1) {
1792           for (i=0; i<imark; i++) {
1793             idx_p[i] = cmap[cworkB[i]];
1794           }
1795         } else {
1796           for (i=0; i<nzB; i++) {
1797             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1798             else break;
1799           }
1800           imark = i;
1801         }
1802         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1803         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1804       }
1805     } else {
1806       if (idx) *idx = 0;
1807       if (v)   *v   = 0;
1808     }
1809   }
1810   *nz  = nztot;
1811   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1812   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1817 {
1818   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1819 
1820   PetscFunctionBegin;
1821   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1822   aij->getrowactive = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1827 {
1828   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1829   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1830   PetscErrorCode ierr;
1831   PetscInt       i,j,cstart = mat->cmap->rstart;
1832   PetscReal      sum = 0.0;
1833   MatScalar      *v;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1838   } else {
1839     if (type == NORM_FROBENIUS) {
1840       v = amat->a;
1841       for (i=0; i<amat->nz; i++) {
1842         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1843       }
1844       v = bmat->a;
1845       for (i=0; i<bmat->nz; i++) {
1846         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1847       }
1848       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1849       *norm = PetscSqrtReal(*norm);
1850       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1851     } else if (type == NORM_1) { /* max column norm */
1852       PetscReal *tmp,*tmp2;
1853       PetscInt  *jj,*garray = aij->garray;
1854       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1855       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1856       *norm = 0.0;
1857       v     = amat->a; jj = amat->j;
1858       for (j=0; j<amat->nz; j++) {
1859         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1860       }
1861       v = bmat->a; jj = bmat->j;
1862       for (j=0; j<bmat->nz; j++) {
1863         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1864       }
1865       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1866       for (j=0; j<mat->cmap->N; j++) {
1867         if (tmp2[j] > *norm) *norm = tmp2[j];
1868       }
1869       ierr = PetscFree(tmp);CHKERRQ(ierr);
1870       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1871       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1872     } else if (type == NORM_INFINITY) { /* max row norm */
1873       PetscReal ntemp = 0.0;
1874       for (j=0; j<aij->A->rmap->n; j++) {
1875         v   = amat->a + amat->i[j];
1876         sum = 0.0;
1877         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1878           sum += PetscAbsScalar(*v); v++;
1879         }
1880         v = bmat->a + bmat->i[j];
1881         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1882           sum += PetscAbsScalar(*v); v++;
1883         }
1884         if (sum > ntemp) ntemp = sum;
1885       }
1886       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1887       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1888     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1894 {
1895   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1896   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1897   PetscErrorCode ierr;
1898   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1899   PetscInt       cstart = A->cmap->rstart,ncol;
1900   Mat            B;
1901   MatScalar      *array;
1902 
1903   PetscFunctionBegin;
1904   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1905 
1906   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1907   ai = Aloc->i; aj = Aloc->j;
1908   bi = Bloc->i; bj = Bloc->j;
1909   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1910     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1911     PetscSFNode          *oloc;
1912     PETSC_UNUSED PetscSF sf;
1913 
1914     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1915     /* compute d_nnz for preallocation */
1916     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1917     for (i=0; i<ai[ma]; i++) {
1918       d_nnz[aj[i]]++;
1919       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1920     }
1921     /* compute local off-diagonal contributions */
1922     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1923     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1924     /* map those to global */
1925     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1926     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1927     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1928     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1929     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1930     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1931     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1932 
1933     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1934     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1935     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1936     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1937     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1938     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1939   } else {
1940     B    = *matout;
1941     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1942     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1943   }
1944 
1945   /* copy over the A part */
1946   array = Aloc->a;
1947   row   = A->rmap->rstart;
1948   for (i=0; i<ma; i++) {
1949     ncol = ai[i+1]-ai[i];
1950     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1951     row++;
1952     array += ncol; aj += ncol;
1953   }
1954   aj = Aloc->j;
1955   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1956 
1957   /* copy over the B part */
1958   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1959   array = Bloc->a;
1960   row   = A->rmap->rstart;
1961   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1962   cols_tmp = cols;
1963   for (i=0; i<mb; i++) {
1964     ncol = bi[i+1]-bi[i];
1965     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1966     row++;
1967     array += ncol; cols_tmp += ncol;
1968   }
1969   ierr = PetscFree(cols);CHKERRQ(ierr);
1970 
1971   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1974     *matout = B;
1975   } else {
1976     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1982 {
1983   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1984   Mat            a    = aij->A,b = aij->B;
1985   PetscErrorCode ierr;
1986   PetscInt       s1,s2,s3;
1987 
1988   PetscFunctionBegin;
1989   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1990   if (rr) {
1991     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1992     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1993     /* Overlap communication with computation. */
1994     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1995   }
1996   if (ll) {
1997     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1998     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1999     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2000   }
2001   /* scale  the diagonal block */
2002   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2003 
2004   if (rr) {
2005     /* Do a scatter end and then right scale the off-diagonal block */
2006     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2007     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2013 {
2014   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2023 {
2024   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2025   Mat            a,b,c,d;
2026   PetscBool      flg;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   a = matA->A; b = matA->B;
2031   c = matB->A; d = matB->B;
2032 
2033   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2034   if (flg) {
2035     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2036   }
2037   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2042 {
2043   PetscErrorCode ierr;
2044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2045   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2046 
2047   PetscFunctionBegin;
2048   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2049   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2050     /* because of the column compression in the off-processor part of the matrix a->B,
2051        the number of columns in a->B and b->B may be different, hence we cannot call
2052        the MatCopy() directly on the two parts. If need be, we can provide a more
2053        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2054        then copying the submatrices */
2055     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2056   } else {
2057     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2058     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2059   }
2060   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2065 {
2066   PetscErrorCode ierr;
2067 
2068   PetscFunctionBegin;
2069   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 /*
2074    Computes the number of nonzeros per row needed for preallocation when X and Y
2075    have different nonzero structure.
2076 */
2077 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2078 {
2079   PetscInt       i,j,k,nzx,nzy;
2080 
2081   PetscFunctionBegin;
2082   /* Set the number of nonzeros in the new matrix */
2083   for (i=0; i<m; i++) {
2084     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2085     nzx = xi[i+1] - xi[i];
2086     nzy = yi[i+1] - yi[i];
2087     nnz[i] = 0;
2088     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2089       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2090       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2091       nnz[i]++;
2092     }
2093     for (; k<nzy; k++) nnz[i]++;
2094   }
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2099 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2100 {
2101   PetscErrorCode ierr;
2102   PetscInt       m = Y->rmap->N;
2103   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2104   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2105 
2106   PetscFunctionBegin;
2107   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2112 {
2113   PetscErrorCode ierr;
2114   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2115   PetscBLASInt   bnz,one=1;
2116   Mat_SeqAIJ     *x,*y;
2117 
2118   PetscFunctionBegin;
2119   if (str == SAME_NONZERO_PATTERN) {
2120     PetscScalar alpha = a;
2121     x    = (Mat_SeqAIJ*)xx->A->data;
2122     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2123     y    = (Mat_SeqAIJ*)yy->A->data;
2124     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2125     x    = (Mat_SeqAIJ*)xx->B->data;
2126     y    = (Mat_SeqAIJ*)yy->B->data;
2127     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2128     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2129     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2130   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2131     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2132   } else {
2133     Mat      B;
2134     PetscInt *nnz_d,*nnz_o;
2135     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2136     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2137     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2138     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2139     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2140     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2141     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2142     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2143     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2144     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2145     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2146     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2147     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2148     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2154 
2155 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2156 {
2157 #if defined(PETSC_USE_COMPLEX)
2158   PetscErrorCode ierr;
2159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2160 
2161   PetscFunctionBegin;
2162   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2163   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2164 #else
2165   PetscFunctionBegin;
2166 #endif
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2171 {
2172   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2177   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2182 {
2183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2188   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2193 {
2194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2195   PetscErrorCode ierr;
2196   PetscInt       i,*idxb = 0;
2197   PetscScalar    *va,*vb;
2198   Vec            vtmp;
2199 
2200   PetscFunctionBegin;
2201   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2202   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2203   if (idx) {
2204     for (i=0; i<A->rmap->n; i++) {
2205       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2206     }
2207   }
2208 
2209   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2210   if (idx) {
2211     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2212   }
2213   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2214   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2215 
2216   for (i=0; i<A->rmap->n; i++) {
2217     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2218       va[i] = vb[i];
2219       if (idx) idx[i] = a->garray[idxb[i]];
2220     }
2221   }
2222 
2223   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2224   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2225   ierr = PetscFree(idxb);CHKERRQ(ierr);
2226   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2233   PetscErrorCode ierr;
2234   PetscInt       i,*idxb = 0;
2235   PetscScalar    *va,*vb;
2236   Vec            vtmp;
2237 
2238   PetscFunctionBegin;
2239   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2240   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2241   if (idx) {
2242     for (i=0; i<A->cmap->n; i++) {
2243       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2244     }
2245   }
2246 
2247   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2248   if (idx) {
2249     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2250   }
2251   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2252   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2253 
2254   for (i=0; i<A->rmap->n; i++) {
2255     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2256       va[i] = vb[i];
2257       if (idx) idx[i] = a->garray[idxb[i]];
2258     }
2259   }
2260 
2261   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2262   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2263   ierr = PetscFree(idxb);CHKERRQ(ierr);
2264   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2269 {
2270   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2271   PetscInt       n      = A->rmap->n;
2272   PetscInt       cstart = A->cmap->rstart;
2273   PetscInt       *cmap  = mat->garray;
2274   PetscInt       *diagIdx, *offdiagIdx;
2275   Vec            diagV, offdiagV;
2276   PetscScalar    *a, *diagA, *offdiagA;
2277   PetscInt       r;
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2282   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2283   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2284   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2285   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2286   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2287   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2288   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2289   for (r = 0; r < n; ++r) {
2290     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2291       a[r]   = diagA[r];
2292       idx[r] = cstart + diagIdx[r];
2293     } else {
2294       a[r]   = offdiagA[r];
2295       idx[r] = cmap[offdiagIdx[r]];
2296     }
2297   }
2298   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2299   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2300   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2301   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2302   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2303   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2308 {
2309   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2310   PetscInt       n      = A->rmap->n;
2311   PetscInt       cstart = A->cmap->rstart;
2312   PetscInt       *cmap  = mat->garray;
2313   PetscInt       *diagIdx, *offdiagIdx;
2314   Vec            diagV, offdiagV;
2315   PetscScalar    *a, *diagA, *offdiagA;
2316   PetscInt       r;
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2321   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2322   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2323   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2324   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2325   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2326   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2327   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2328   for (r = 0; r < n; ++r) {
2329     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2330       a[r]   = diagA[r];
2331       idx[r] = cstart + diagIdx[r];
2332     } else {
2333       a[r]   = offdiagA[r];
2334       idx[r] = cmap[offdiagIdx[r]];
2335     }
2336   }
2337   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2338   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2339   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2340   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2341   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2342   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2347 {
2348   PetscErrorCode ierr;
2349   Mat            *dummy;
2350 
2351   PetscFunctionBegin;
2352   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2353   *newmat = *dummy;
2354   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2359 {
2360   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2361   PetscErrorCode ierr;
2362 
2363   PetscFunctionBegin;
2364   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2365   A->factorerrortype = a->A->factorerrortype;
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2370 {
2371   PetscErrorCode ierr;
2372   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2376   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2377   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2378   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2383 {
2384   PetscFunctionBegin;
2385   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2386   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 /*@
2391    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2392 
2393    Collective on Mat
2394 
2395    Input Parameters:
2396 +    A - the matrix
2397 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2398 
2399  Level: advanced
2400 
2401 @*/
2402 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2403 {
2404   PetscErrorCode       ierr;
2405 
2406   PetscFunctionBegin;
2407   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2412 {
2413   PetscErrorCode       ierr;
2414   PetscBool            sc = PETSC_FALSE,flg;
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2418   ierr = PetscObjectOptionsBegin((PetscObject)A);
2419     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2420     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2421     if (flg) {
2422       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2423     }
2424   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2429 {
2430   PetscErrorCode ierr;
2431   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2432   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2433 
2434   PetscFunctionBegin;
2435   if (!Y->preallocated) {
2436     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2437   } else if (!aij->nz) {
2438     PetscInt nonew = aij->nonew;
2439     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2440     aij->nonew = nonew;
2441   }
2442   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2447 {
2448   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2449   PetscErrorCode ierr;
2450 
2451   PetscFunctionBegin;
2452   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2453   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2454   if (d) {
2455     PetscInt rstart;
2456     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2457     *d += rstart;
2458 
2459   }
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 
2464 /* -------------------------------------------------------------------*/
2465 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2466                                        MatGetRow_MPIAIJ,
2467                                        MatRestoreRow_MPIAIJ,
2468                                        MatMult_MPIAIJ,
2469                                 /* 4*/ MatMultAdd_MPIAIJ,
2470                                        MatMultTranspose_MPIAIJ,
2471                                        MatMultTransposeAdd_MPIAIJ,
2472                                        0,
2473                                        0,
2474                                        0,
2475                                 /*10*/ 0,
2476                                        0,
2477                                        0,
2478                                        MatSOR_MPIAIJ,
2479                                        MatTranspose_MPIAIJ,
2480                                 /*15*/ MatGetInfo_MPIAIJ,
2481                                        MatEqual_MPIAIJ,
2482                                        MatGetDiagonal_MPIAIJ,
2483                                        MatDiagonalScale_MPIAIJ,
2484                                        MatNorm_MPIAIJ,
2485                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2486                                        MatAssemblyEnd_MPIAIJ,
2487                                        MatSetOption_MPIAIJ,
2488                                        MatZeroEntries_MPIAIJ,
2489                                 /*24*/ MatZeroRows_MPIAIJ,
2490                                        0,
2491                                        0,
2492                                        0,
2493                                        0,
2494                                 /*29*/ MatSetUp_MPIAIJ,
2495                                        0,
2496                                        0,
2497                                        MatGetDiagonalBlock_MPIAIJ,
2498                                        0,
2499                                 /*34*/ MatDuplicate_MPIAIJ,
2500                                        0,
2501                                        0,
2502                                        0,
2503                                        0,
2504                                 /*39*/ MatAXPY_MPIAIJ,
2505                                        MatCreateSubMatrices_MPIAIJ,
2506                                        MatIncreaseOverlap_MPIAIJ,
2507                                        MatGetValues_MPIAIJ,
2508                                        MatCopy_MPIAIJ,
2509                                 /*44*/ MatGetRowMax_MPIAIJ,
2510                                        MatScale_MPIAIJ,
2511                                        MatShift_MPIAIJ,
2512                                        MatDiagonalSet_MPIAIJ,
2513                                        MatZeroRowsColumns_MPIAIJ,
2514                                 /*49*/ MatSetRandom_MPIAIJ,
2515                                        0,
2516                                        0,
2517                                        0,
2518                                        0,
2519                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2520                                        0,
2521                                        MatSetUnfactored_MPIAIJ,
2522                                        MatPermute_MPIAIJ,
2523                                        0,
2524                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2525                                        MatDestroy_MPIAIJ,
2526                                        MatView_MPIAIJ,
2527                                        0,
2528                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2529                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2530                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2535                                        MatGetRowMinAbs_MPIAIJ,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                 /*75*/ MatFDColoringApply_AIJ,
2541                                        MatSetFromOptions_MPIAIJ,
2542                                        0,
2543                                        0,
2544                                        MatFindZeroDiagonals_MPIAIJ,
2545                                 /*80*/ 0,
2546                                        0,
2547                                        0,
2548                                 /*83*/ MatLoad_MPIAIJ,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2555                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2556                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2557                                        MatPtAP_MPIAIJ_MPIAIJ,
2558                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2559                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2560                                        0,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                 /*99*/ 0,
2565                                        0,
2566                                        0,
2567                                        MatConjugate_MPIAIJ,
2568                                        0,
2569                                 /*104*/MatSetValuesRow_MPIAIJ,
2570                                        MatRealPart_MPIAIJ,
2571                                        MatImaginaryPart_MPIAIJ,
2572                                        0,
2573                                        0,
2574                                 /*109*/0,
2575                                        0,
2576                                        MatGetRowMin_MPIAIJ,
2577                                        0,
2578                                        MatMissingDiagonal_MPIAIJ,
2579                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2580                                        0,
2581                                        MatGetGhosts_MPIAIJ,
2582                                        0,
2583                                        0,
2584                                 /*119*/0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        MatGetMultiProcBlock_MPIAIJ,
2589                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2590                                        MatGetColumnNorms_MPIAIJ,
2591                                        MatInvertBlockDiagonal_MPIAIJ,
2592                                        0,
2593                                        MatCreateSubMatricesMPI_MPIAIJ,
2594                                 /*129*/0,
2595                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2596                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2597                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2598                                        0,
2599                                 /*134*/0,
2600                                        0,
2601                                        MatRARt_MPIAIJ_MPIAIJ,
2602                                        0,
2603                                        0,
2604                                 /*139*/MatSetBlockSizes_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                        MatFDColoringSetUp_MPIXAIJ,
2608                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2609                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2610 };
2611 
2612 /* ----------------------------------------------------------------------------------------*/
2613 
2614 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2615 {
2616   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2617   PetscErrorCode ierr;
2618 
2619   PetscFunctionBegin;
2620   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2621   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2626 {
2627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2628   PetscErrorCode ierr;
2629 
2630   PetscFunctionBegin;
2631   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2632   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2637 {
2638   Mat_MPIAIJ     *b;
2639   PetscErrorCode ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2643   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2644   b = (Mat_MPIAIJ*)B->data;
2645 
2646 #if defined(PETSC_USE_CTABLE)
2647   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2648 #else
2649   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2650 #endif
2651   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2652   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2653   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2654 
2655   /* Because the B will have been resized we simply destroy it and create a new one each time */
2656   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2657   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2658   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2659   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2660   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2661   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2662 
2663   if (!B->preallocated) {
2664     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2665     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2666     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2667     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2668     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2669   }
2670 
2671   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2672   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2673   B->preallocated  = PETSC_TRUE;
2674   B->was_assembled = PETSC_FALSE;
2675   B->assembled     = PETSC_FALSE;;
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2680 {
2681   Mat            mat;
2682   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2683   PetscErrorCode ierr;
2684 
2685   PetscFunctionBegin;
2686   *newmat = 0;
2687   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2688   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2689   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2690   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2691   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2692   a       = (Mat_MPIAIJ*)mat->data;
2693 
2694   mat->factortype   = matin->factortype;
2695   mat->assembled    = PETSC_TRUE;
2696   mat->insertmode   = NOT_SET_VALUES;
2697   mat->preallocated = PETSC_TRUE;
2698 
2699   a->size         = oldmat->size;
2700   a->rank         = oldmat->rank;
2701   a->donotstash   = oldmat->donotstash;
2702   a->roworiented  = oldmat->roworiented;
2703   a->rowindices   = 0;
2704   a->rowvalues    = 0;
2705   a->getrowactive = PETSC_FALSE;
2706 
2707   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2708   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2709 
2710   if (oldmat->colmap) {
2711 #if defined(PETSC_USE_CTABLE)
2712     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2713 #else
2714     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2715     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2716     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2717 #endif
2718   } else a->colmap = 0;
2719   if (oldmat->garray) {
2720     PetscInt len;
2721     len  = oldmat->B->cmap->n;
2722     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2723     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2724     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2725   } else a->garray = 0;
2726 
2727   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2728   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2729   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2730   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2731   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2732   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2733   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2734   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2735   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2736   *newmat = mat;
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2741 {
2742   PetscScalar    *vals,*svals;
2743   MPI_Comm       comm;
2744   PetscErrorCode ierr;
2745   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2746   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2747   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2748   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2749   PetscInt       cend,cstart,n,*rowners;
2750   int            fd;
2751   PetscInt       bs = newMat->rmap->bs;
2752 
2753   PetscFunctionBegin;
2754   /* force binary viewer to load .info file if it has not yet done so */
2755   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2756   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2757   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2758   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2759   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2760   if (!rank) {
2761     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2762     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2763     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2764   }
2765 
2766   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2767   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2768   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2769   if (bs < 0) bs = 1;
2770 
2771   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2772   M    = header[1]; N = header[2];
2773 
2774   /* If global sizes are set, check if they are consistent with that given in the file */
2775   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2776   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2777 
2778   /* determine ownership of all (block) rows */
2779   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2780   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2781   else m = newMat->rmap->n; /* Set by user */
2782 
2783   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2784   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2785 
2786   /* First process needs enough room for process with most rows */
2787   if (!rank) {
2788     mmax = rowners[1];
2789     for (i=2; i<=size; i++) {
2790       mmax = PetscMax(mmax, rowners[i]);
2791     }
2792   } else mmax = -1;             /* unused, but compilers complain */
2793 
2794   rowners[0] = 0;
2795   for (i=2; i<=size; i++) {
2796     rowners[i] += rowners[i-1];
2797   }
2798   rstart = rowners[rank];
2799   rend   = rowners[rank+1];
2800 
2801   /* distribute row lengths to all processors */
2802   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2803   if (!rank) {
2804     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2805     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2806     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2807     for (j=0; j<m; j++) {
2808       procsnz[0] += ourlens[j];
2809     }
2810     for (i=1; i<size; i++) {
2811       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2812       /* calculate the number of nonzeros on each processor */
2813       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2814         procsnz[i] += rowlengths[j];
2815       }
2816       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2817     }
2818     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2819   } else {
2820     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2821   }
2822 
2823   if (!rank) {
2824     /* determine max buffer needed and allocate it */
2825     maxnz = 0;
2826     for (i=0; i<size; i++) {
2827       maxnz = PetscMax(maxnz,procsnz[i]);
2828     }
2829     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2830 
2831     /* read in my part of the matrix column indices  */
2832     nz   = procsnz[0];
2833     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2834     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2835 
2836     /* read in every one elses and ship off */
2837     for (i=1; i<size; i++) {
2838       nz   = procsnz[i];
2839       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2840       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2841     }
2842     ierr = PetscFree(cols);CHKERRQ(ierr);
2843   } else {
2844     /* determine buffer space needed for message */
2845     nz = 0;
2846     for (i=0; i<m; i++) {
2847       nz += ourlens[i];
2848     }
2849     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2850 
2851     /* receive message of column indices*/
2852     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2853   }
2854 
2855   /* determine column ownership if matrix is not square */
2856   if (N != M) {
2857     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2858     else n = newMat->cmap->n;
2859     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2860     cstart = cend - n;
2861   } else {
2862     cstart = rstart;
2863     cend   = rend;
2864     n      = cend - cstart;
2865   }
2866 
2867   /* loop over local rows, determining number of off diagonal entries */
2868   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2869   jj   = 0;
2870   for (i=0; i<m; i++) {
2871     for (j=0; j<ourlens[i]; j++) {
2872       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2873       jj++;
2874     }
2875   }
2876 
2877   for (i=0; i<m; i++) {
2878     ourlens[i] -= offlens[i];
2879   }
2880   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2881 
2882   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2883 
2884   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2885 
2886   for (i=0; i<m; i++) {
2887     ourlens[i] += offlens[i];
2888   }
2889 
2890   if (!rank) {
2891     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2892 
2893     /* read in my part of the matrix numerical values  */
2894     nz   = procsnz[0];
2895     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2896 
2897     /* insert into matrix */
2898     jj      = rstart;
2899     smycols = mycols;
2900     svals   = vals;
2901     for (i=0; i<m; i++) {
2902       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2903       smycols += ourlens[i];
2904       svals   += ourlens[i];
2905       jj++;
2906     }
2907 
2908     /* read in other processors and ship out */
2909     for (i=1; i<size; i++) {
2910       nz   = procsnz[i];
2911       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2912       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2913     }
2914     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2915   } else {
2916     /* receive numeric values */
2917     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2918 
2919     /* receive message of values*/
2920     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2921 
2922     /* insert into matrix */
2923     jj      = rstart;
2924     smycols = mycols;
2925     svals   = vals;
2926     for (i=0; i<m; i++) {
2927       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2928       smycols += ourlens[i];
2929       svals   += ourlens[i];
2930       jj++;
2931     }
2932   }
2933   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2934   ierr = PetscFree(vals);CHKERRQ(ierr);
2935   ierr = PetscFree(mycols);CHKERRQ(ierr);
2936   ierr = PetscFree(rowners);CHKERRQ(ierr);
2937   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2938   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 /* Not scalable because of ISAllGather() unless getting all columns. */
2943 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2944 {
2945   PetscErrorCode ierr;
2946   IS             iscol_local;
2947   PetscBool      isstride;
2948   PetscMPIInt    lisstride=0,gisstride;
2949 
2950   PetscFunctionBegin;
2951   /* check if we are grabbing all columns*/
2952   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2953 
2954   if (isstride) {
2955     PetscInt  start,len,mstart,mlen;
2956     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2957     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2958     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2959     if (mstart == start && mlen-mstart == len) lisstride = 1;
2960   }
2961 
2962   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2963   if (gisstride) {
2964     PetscInt N;
2965     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2966     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2967     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2968     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2969   } else {
2970     PetscInt cbs;
2971     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2972     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2973     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2974   }
2975 
2976   *isseq = iscol_local;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 /*
2981  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
2982  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
2983 
2984  Input Parameters:
2985    mat - matrix
2986    isrow - parallel row index set; its local indices are a subset of local columns of mat,
2987            i.e., mat->rstart <= isrow[i] < mat->rend
2988    iscol - parallel column index set; its local indices are a subset of local columns of mat,
2989            i.e., mat->cstart <= iscol[i] < mat->cend
2990  Output Parameter:
2991    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
2992    iscol_o - sequential column index set for retrieving mat->B
2993    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
2994  */
2995 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
2996 {
2997   PetscErrorCode ierr;
2998   Vec            x,cmap;
2999   const PetscInt *is_idx;
3000   PetscScalar    *xarray,*cmaparray;
3001   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3002   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3003   Mat            B=a->B;
3004   Vec            lvec=a->lvec,lcmap;
3005   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3006   MPI_Comm       comm;
3007 
3008   PetscFunctionBegin;
3009   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3010   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3011 
3012   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3013   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3014   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3015   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3016 
3017   /* Get start indices */
3018   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3019   isstart -= ncols;
3020   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3021 
3022   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3023   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3024   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3025   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3026   for (i=0; i<ncols; i++) {
3027     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3028     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3029     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3030   }
3031   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3032   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3033   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3034 
3035   /* Get iscol_d */
3036   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3037   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3038   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3039 
3040   /* Get isrow_d */
3041   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3042   rstart = mat->rmap->rstart;
3043   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3044   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3045   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3046   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3047 
3048   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3049   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3050   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3051 
3052   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3053   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3054 
3055   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3056 
3057   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3058   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3059   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3060 
3061   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3062   /* off-process column indices */
3063   count = 0;
3064   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3065   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3066 
3067   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3068   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3069   for (i=0; i<Bn; i++) {
3070     if (PetscRealPart(xarray[i]) > -1.0) {
3071       idx[count]     = i;                   /* local column index in off-diagonal part B */
3072       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3073     }
3074   }
3075   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3076   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3077 
3078   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3079   /* cannot ensure iscol_o has same blocksize as iscol! */
3080 
3081   ierr = PetscFree(idx);CHKERRQ(ierr);
3082 
3083   *garray = cmap1;
3084 
3085   ierr = VecDestroy(&x);CHKERRQ(ierr);
3086   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3087   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3092 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3093 {
3094   PetscErrorCode ierr;
3095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3096   Mat            M = NULL;
3097   MPI_Comm       comm;
3098   IS             iscol_d,isrow_d,iscol_o;
3099   Mat            Asub = NULL,Bsub = NULL;
3100   PetscInt       n;
3101 
3102   PetscFunctionBegin;
3103   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3104 
3105   if (call == MAT_REUSE_MATRIX) {
3106     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3107     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3108     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3109 
3110     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3111     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3112 
3113     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3114     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3115 
3116     /* Update diagonal and off-diagonal portions of submat */
3117     asub = (Mat_MPIAIJ*)(*submat)->data;
3118     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3119     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3120     if (n) {
3121       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3122     }
3123     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3124     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3125 
3126   } else { /* call == MAT_INITIAL_MATRIX) */
3127     const PetscInt *garray;
3128     PetscInt        BsubN;
3129 
3130     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3131     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3132 
3133     /* Create local submatrices Asub and Bsub */
3134     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3135     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3136 
3137     /* Create submatrix M */
3138     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3139 
3140     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3141     asub = (Mat_MPIAIJ*)M->data;
3142 
3143     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3144     n = asub->B->cmap->N;
3145     if (BsubN > n) {
3146       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3147       const PetscInt *idx;
3148       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3149       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3150 
3151       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3152       j = 0;
3153       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3154       for (i=0; i<n; i++) {
3155         if (j >= BsubN) break;
3156         while (subgarray[i] > garray[j]) j++;
3157 
3158         if (subgarray[i] == garray[j]) {
3159           idx_new[i] = idx[j++];
3160         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3161       }
3162       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3163 
3164       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3165       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3166 
3167     } else if (BsubN < n) {
3168       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3169     }
3170 
3171     ierr = PetscFree(garray);CHKERRQ(ierr);
3172     *submat = M;
3173 
3174     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3175     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3176     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3177 
3178     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3179     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3180 
3181     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3182     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3183   }
3184   PetscFunctionReturn(0);
3185 }
3186 
3187 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3188 {
3189   PetscErrorCode ierr;
3190   IS             iscol_local=NULL,isrow_d;
3191   PetscInt       csize;
3192   PetscInt       n,i,j,start,end;
3193   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3194   MPI_Comm       comm;
3195 
3196   PetscFunctionBegin;
3197   /* If isrow has same processor distribution as mat,
3198      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3199   if (call == MAT_REUSE_MATRIX) {
3200     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3201     if (isrow_d) {
3202       sameRowDist  = PETSC_TRUE;
3203       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3204     } else {
3205       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3206       if (iscol_local) {
3207         sameRowDist  = PETSC_TRUE;
3208         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3209       }
3210     }
3211   } else {
3212     /* Check if isrow has same processor distribution as mat */
3213     sameDist[0] = PETSC_FALSE;
3214     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3215     if (!n) {
3216       sameDist[0] = PETSC_TRUE;
3217     } else {
3218       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3219       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3220       if (i >= start && j < end) {
3221         sameDist[0] = PETSC_TRUE;
3222       }
3223     }
3224 
3225     /* Check if iscol has same processor distribution as mat */
3226     sameDist[1] = PETSC_FALSE;
3227     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3228     if (!n) {
3229       sameDist[1] = PETSC_TRUE;
3230     } else {
3231       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3232       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3233       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3234     }
3235 
3236     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3237     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3238     sameRowDist = tsameDist[0];
3239   }
3240 
3241   if (sameRowDist) {
3242     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3243       /* isrow and iscol have same processor distribution as mat */
3244       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3245       PetscFunctionReturn(0);
3246     } else { /* sameRowDist */
3247       /* isrow has same processor distribution as mat */
3248       if (call == MAT_INITIAL_MATRIX) {
3249         PetscBool sorted;
3250         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3251         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3252         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3253         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3254 
3255         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3256         if (sorted) {
3257           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3258           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3259           PetscFunctionReturn(0);
3260         }
3261       } else { /* call == MAT_REUSE_MATRIX */
3262         IS    iscol_sub;
3263         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"not done yet"); //querry iscol_sub!
3264         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3265         if (iscol_sub) {
3266           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3267           PetscFunctionReturn(0);
3268         }
3269       }
3270     }
3271   }
3272 
3273   /* General case: iscol -> iscol_local which has global size of iscol */
3274   if (call == MAT_REUSE_MATRIX) {
3275     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3276     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3277   } else {
3278     if (!iscol_local) {
3279       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3280     }
3281   }
3282 
3283   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3284   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3285 
3286   if (call == MAT_INITIAL_MATRIX) {
3287     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3288     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3289   }
3290   PetscFunctionReturn(0);
3291 }
3292 
3293 /*@C
3294      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3295          and "off-diagonal" part of the matrix in CSR format.
3296 
3297    Collective on MPI_Comm
3298 
3299    Input Parameters:
3300 +  comm - MPI communicator
3301 .  A - "diagonal" portion of matrix
3302 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3303 -  garray - global index of B columns
3304 
3305    Output Parameter:
3306 .   mat - the matrix, with input A as its local diagonal matrix
3307    Level: advanced
3308 
3309    Notes:
3310        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3311        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3312 
3313 .seealso: MatCreateMPIAIJWithSplitArrays()
3314 @*/
3315 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3316 {
3317   PetscErrorCode ierr;
3318   Mat_MPIAIJ     *maij;
3319   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3320   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3321   PetscScalar    *oa=b->a;
3322   Mat            Bnew;
3323   PetscInt       m,n,N;
3324 
3325   PetscFunctionBegin;
3326   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3327   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3328   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3329   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3330   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3331   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3332 
3333   /* Get global columns of mat */
3334   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3335 
3336   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3337   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3338   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3339   maij = (Mat_MPIAIJ*)(*mat)->data;
3340 
3341   (*mat)->preallocated = PETSC_TRUE;
3342 
3343   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3344   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3345 
3346   /* Set A as diagonal portion of *mat */
3347   maij->A = A;
3348 
3349   nz = oi[m];
3350   for (i=0; i<nz; i++) {
3351     col   = oj[i];
3352     oj[i] = garray[col];
3353   }
3354 
3355    /* Set Bnew as off-diagonal portion of *mat */
3356   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3357   bnew        = (Mat_SeqAIJ*)Bnew->data;
3358   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3359   maij->B     = Bnew;
3360 
3361   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3362 
3363   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3364   b->free_a       = PETSC_FALSE;
3365   b->free_ij      = PETSC_FALSE;
3366   ierr = MatDestroy(&B);CHKERRQ(ierr);
3367 
3368   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3369   bnew->free_a       = PETSC_TRUE;
3370   bnew->free_ij      = PETSC_TRUE;
3371 
3372   /* condense columns of maij->B */
3373   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3374   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3375   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3376   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3377   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3378   PetscFunctionReturn(0);
3379 }
3380 
3381 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3382 
3383 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3384 {
3385   PetscErrorCode ierr;
3386   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3387   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3388   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3389   Mat            M,Msub,B=a->B;
3390   MatScalar      *aa;
3391   Mat_SeqAIJ     *aij;
3392   PetscInt       *garray = a->garray,*colsub,Ncols;
3393   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3394   IS             iscol_sub,iscmap;
3395   const PetscInt *is_idx,*cmap;
3396   PetscBool      allcolumns=PETSC_FALSE;
3397   MPI_Comm       comm;
3398 
3399   PetscFunctionBegin;
3400   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3401 
3402   if (call == MAT_REUSE_MATRIX) {
3403     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3404     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3405     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3406 
3407     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3408     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3409 
3410     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3411     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3412 
3413     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3414 
3415   } else { /* call == MAT_INITIAL_MATRIX) */
3416     PetscBool flg;
3417 
3418     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3419     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3420 
3421     /* (1) iscol -> nonscalable iscol_local */
3422     /* Check for special case: each processor gets entire matrix columns */
3423     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3424     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3425     if (allcolumns) {
3426       iscol_sub = iscol_local;
3427       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3428       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3429 
3430     } else {
3431       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3432       PetscInt *idx,*cmap1,k;
3433       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3434       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3435       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3436       count = 0;
3437       k     = 0;
3438       for (i=0; i<Ncols; i++) {
3439         j = is_idx[i];
3440         if (j >= cstart && j < cend) {
3441           /* diagonal part of mat */
3442           idx[count]     = j;
3443           cmap1[count++] = i; /* column index in submat */
3444         } else if (Bn) {
3445           /* off-diagonal part of mat */
3446           if (j == garray[k]) {
3447             idx[count]     = j;
3448             cmap1[count++] = i;  /* column index in submat */
3449           } else if (j > garray[k]) {
3450             while (j > garray[k] && k < Bn-1) k++;
3451             if (j == garray[k]) {
3452               idx[count]     = j;
3453               cmap1[count++] = i; /* column index in submat */
3454             }
3455           }
3456         }
3457       }
3458       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3459 
3460       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3461       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3462       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3463 
3464       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3465     }
3466 
3467     /* (3) Create sequential Msub */
3468     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3469   }
3470 
3471   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3472   aij  = (Mat_SeqAIJ*)(Msub)->data;
3473   ii   = aij->i;
3474   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3475 
3476   /*
3477       m - number of local rows
3478       Ncols - number of columns (same on all processors)
3479       rstart - first row in new global matrix generated
3480   */
3481   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3482 
3483   if (call == MAT_INITIAL_MATRIX) {
3484     /* (4) Create parallel newmat */
3485     PetscMPIInt    rank,size;
3486     PetscInt       csize;
3487 
3488     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3489     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3490 
3491     /*
3492         Determine the number of non-zeros in the diagonal and off-diagonal
3493         portions of the matrix in order to do correct preallocation
3494     */
3495 
3496     /* first get start and end of "diagonal" columns */
3497     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3498     if (csize == PETSC_DECIDE) {
3499       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3500       if (mglobal == Ncols) { /* square matrix */
3501         nlocal = m;
3502       } else {
3503         nlocal = Ncols/size + ((Ncols % size) > rank);
3504       }
3505     } else {
3506       nlocal = csize;
3507     }
3508     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3509     rstart = rend - nlocal;
3510     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3511 
3512     /* next, compute all the lengths */
3513     jj    = aij->j;
3514     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3515     olens = dlens + m;
3516     for (i=0; i<m; i++) {
3517       jend = ii[i+1] - ii[i];
3518       olen = 0;
3519       dlen = 0;
3520       for (j=0; j<jend; j++) {
3521         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3522         else dlen++;
3523         jj++;
3524       }
3525       olens[i] = olen;
3526       dlens[i] = dlen;
3527     }
3528 
3529     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3530     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3531 
3532     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3533     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3534     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3535     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3536     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3537     ierr = PetscFree(dlens);CHKERRQ(ierr);
3538 
3539   } else { /* call == MAT_REUSE_MATRIX */
3540     M    = *newmat;
3541     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3542     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3543     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3544     /*
3545          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3546        rather than the slower MatSetValues().
3547     */
3548     M->was_assembled = PETSC_TRUE;
3549     M->assembled     = PETSC_FALSE;
3550   }
3551 
3552   /* (5) Set values of Msub to *newmat */
3553   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3554   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3555 
3556   jj   = aij->j;
3557   aa   = aij->a;
3558   for (i=0; i<m; i++) {
3559     row = rstart + i;
3560     nz  = ii[i+1] - ii[i];
3561     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3562     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3563     jj += nz; aa += nz;
3564   }
3565   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3566 
3567   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3569 
3570   ierr = PetscFree(colsub);CHKERRQ(ierr);
3571 
3572   /* save Msub, iscol_sub and iscmap used in processor for next request */
3573   if (call ==  MAT_INITIAL_MATRIX) {
3574     *newmat = M;
3575     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3576     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3577 
3578     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3579     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3580 
3581     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3582     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3583 
3584     if (iscol_local) {
3585       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3586       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3587     }
3588   }
3589   PetscFunctionReturn(0);
3590 }
3591 
3592 /*
3593     Not great since it makes two copies of the submatrix, first an SeqAIJ
3594   in local and then by concatenating the local matrices the end result.
3595   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3596 
3597   Note: This requires a sequential iscol with all indices.
3598 */
3599 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3600 {
3601   PetscErrorCode ierr;
3602   PetscMPIInt    rank,size;
3603   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3604   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3605   Mat            M,Mreuse;
3606   MatScalar      *aa,*vwork;
3607   MPI_Comm       comm;
3608   Mat_SeqAIJ     *aij;
3609   PetscBool      colflag,allcolumns=PETSC_FALSE;
3610 
3611   PetscFunctionBegin;
3612   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3613   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3614   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3615 
3616   /* Check for special case: each processor gets entire matrix columns */
3617   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3618   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3619   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3620 
3621   if (call ==  MAT_REUSE_MATRIX) {
3622     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3623     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3624     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3625   } else {
3626     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3627   }
3628 
3629   /*
3630       m - number of local rows
3631       n - number of columns (same on all processors)
3632       rstart - first row in new global matrix generated
3633   */
3634   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3635   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3636   if (call == MAT_INITIAL_MATRIX) {
3637     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3638     ii  = aij->i;
3639     jj  = aij->j;
3640 
3641     /*
3642         Determine the number of non-zeros in the diagonal and off-diagonal
3643         portions of the matrix in order to do correct preallocation
3644     */
3645 
3646     /* first get start and end of "diagonal" columns */
3647     if (csize == PETSC_DECIDE) {
3648       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3649       if (mglobal == n) { /* square matrix */
3650         nlocal = m;
3651       } else {
3652         nlocal = n/size + ((n % size) > rank);
3653       }
3654     } else {
3655       nlocal = csize;
3656     }
3657     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3658     rstart = rend - nlocal;
3659     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3660 
3661     /* next, compute all the lengths */
3662     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3663     olens = dlens + m;
3664     for (i=0; i<m; i++) {
3665       jend = ii[i+1] - ii[i];
3666       olen = 0;
3667       dlen = 0;
3668       for (j=0; j<jend; j++) {
3669         if (*jj < rstart || *jj >= rend) olen++;
3670         else dlen++;
3671         jj++;
3672       }
3673       olens[i] = olen;
3674       dlens[i] = dlen;
3675     }
3676     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3677     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3678     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3679     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3680     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3681     ierr = PetscFree(dlens);CHKERRQ(ierr);
3682   } else {
3683     PetscInt ml,nl;
3684 
3685     M    = *newmat;
3686     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3687     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3688     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3689     /*
3690          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3691        rather than the slower MatSetValues().
3692     */
3693     M->was_assembled = PETSC_TRUE;
3694     M->assembled     = PETSC_FALSE;
3695   }
3696   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3697   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3698   ii   = aij->i;
3699   jj   = aij->j;
3700   aa   = aij->a;
3701   for (i=0; i<m; i++) {
3702     row   = rstart + i;
3703     nz    = ii[i+1] - ii[i];
3704     cwork = jj;     jj += nz;
3705     vwork = aa;     aa += nz;
3706     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3707   }
3708 
3709   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3710   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   *newmat = M;
3712 
3713   /* save submatrix used in processor for next request */
3714   if (call ==  MAT_INITIAL_MATRIX) {
3715     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3716     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3717   }
3718   PetscFunctionReturn(0);
3719 }
3720 
3721 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3722 {
3723   PetscInt       m,cstart, cend,j,nnz,i,d;
3724   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3725   const PetscInt *JJ;
3726   PetscScalar    *values;
3727   PetscErrorCode ierr;
3728   PetscBool      nooffprocentries;
3729 
3730   PetscFunctionBegin;
3731   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3732 
3733   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3734   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3735   m      = B->rmap->n;
3736   cstart = B->cmap->rstart;
3737   cend   = B->cmap->rend;
3738   rstart = B->rmap->rstart;
3739 
3740   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3741 
3742 #if defined(PETSC_USE_DEBUGGING)
3743   for (i=0; i<m; i++) {
3744     nnz = Ii[i+1]- Ii[i];
3745     JJ  = J + Ii[i];
3746     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3747     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3748     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3749   }
3750 #endif
3751 
3752   for (i=0; i<m; i++) {
3753     nnz     = Ii[i+1]- Ii[i];
3754     JJ      = J + Ii[i];
3755     nnz_max = PetscMax(nnz_max,nnz);
3756     d       = 0;
3757     for (j=0; j<nnz; j++) {
3758       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3759     }
3760     d_nnz[i] = d;
3761     o_nnz[i] = nnz - d;
3762   }
3763   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3764   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3765 
3766   if (v) values = (PetscScalar*)v;
3767   else {
3768     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3769   }
3770 
3771   for (i=0; i<m; i++) {
3772     ii   = i + rstart;
3773     nnz  = Ii[i+1]- Ii[i];
3774     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3775   }
3776   nooffprocentries    = B->nooffprocentries;
3777   B->nooffprocentries = PETSC_TRUE;
3778   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3779   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3780   B->nooffprocentries = nooffprocentries;
3781 
3782   if (!v) {
3783     ierr = PetscFree(values);CHKERRQ(ierr);
3784   }
3785   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3786   PetscFunctionReturn(0);
3787 }
3788 
3789 /*@
3790    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3791    (the default parallel PETSc format).
3792 
3793    Collective on MPI_Comm
3794 
3795    Input Parameters:
3796 +  B - the matrix
3797 .  i - the indices into j for the start of each local row (starts with zero)
3798 .  j - the column indices for each local row (starts with zero)
3799 -  v - optional values in the matrix
3800 
3801    Level: developer
3802 
3803    Notes:
3804        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3805      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3806      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3807 
3808        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3809 
3810        The format which is used for the sparse matrix input, is equivalent to a
3811     row-major ordering.. i.e for the following matrix, the input data expected is
3812     as shown
3813 
3814 $        1 0 0
3815 $        2 0 3     P0
3816 $       -------
3817 $        4 5 6     P1
3818 $
3819 $     Process0 [P0]: rows_owned=[0,1]
3820 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3821 $        j =  {0,0,2}  [size = 3]
3822 $        v =  {1,2,3}  [size = 3]
3823 $
3824 $     Process1 [P1]: rows_owned=[2]
3825 $        i =  {0,3}    [size = nrow+1  = 1+1]
3826 $        j =  {0,1,2}  [size = 3]
3827 $        v =  {4,5,6}  [size = 3]
3828 
3829 .keywords: matrix, aij, compressed row, sparse, parallel
3830 
3831 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3832           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3833 @*/
3834 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3835 {
3836   PetscErrorCode ierr;
3837 
3838   PetscFunctionBegin;
3839   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3840   PetscFunctionReturn(0);
3841 }
3842 
3843 /*@C
3844    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3845    (the default parallel PETSc format).  For good matrix assembly performance
3846    the user should preallocate the matrix storage by setting the parameters
3847    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3848    performance can be increased by more than a factor of 50.
3849 
3850    Collective on MPI_Comm
3851 
3852    Input Parameters:
3853 +  B - the matrix
3854 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3855            (same value is used for all local rows)
3856 .  d_nnz - array containing the number of nonzeros in the various rows of the
3857            DIAGONAL portion of the local submatrix (possibly different for each row)
3858            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3859            The size of this array is equal to the number of local rows, i.e 'm'.
3860            For matrices that will be factored, you must leave room for (and set)
3861            the diagonal entry even if it is zero.
3862 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3863            submatrix (same value is used for all local rows).
3864 -  o_nnz - array containing the number of nonzeros in the various rows of the
3865            OFF-DIAGONAL portion of the local submatrix (possibly different for
3866            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3867            structure. The size of this array is equal to the number
3868            of local rows, i.e 'm'.
3869 
3870    If the *_nnz parameter is given then the *_nz parameter is ignored
3871 
3872    The AIJ format (also called the Yale sparse matrix format or
3873    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3874    storage.  The stored row and column indices begin with zero.
3875    See Users-Manual: ch_mat for details.
3876 
3877    The parallel matrix is partitioned such that the first m0 rows belong to
3878    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3879    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3880 
3881    The DIAGONAL portion of the local submatrix of a processor can be defined
3882    as the submatrix which is obtained by extraction the part corresponding to
3883    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3884    first row that belongs to the processor, r2 is the last row belonging to
3885    the this processor, and c1-c2 is range of indices of the local part of a
3886    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3887    common case of a square matrix, the row and column ranges are the same and
3888    the DIAGONAL part is also square. The remaining portion of the local
3889    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3890 
3891    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3892 
3893    You can call MatGetInfo() to get information on how effective the preallocation was;
3894    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3895    You can also run with the option -info and look for messages with the string
3896    malloc in them to see if additional memory allocation was needed.
3897 
3898    Example usage:
3899 
3900    Consider the following 8x8 matrix with 34 non-zero values, that is
3901    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3902    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3903    as follows:
3904 
3905 .vb
3906             1  2  0  |  0  3  0  |  0  4
3907     Proc0   0  5  6  |  7  0  0  |  8  0
3908             9  0 10  | 11  0  0  | 12  0
3909     -------------------------------------
3910            13  0 14  | 15 16 17  |  0  0
3911     Proc1   0 18  0  | 19 20 21  |  0  0
3912             0  0  0  | 22 23  0  | 24  0
3913     -------------------------------------
3914     Proc2  25 26 27  |  0  0 28  | 29  0
3915            30  0  0  | 31 32 33  |  0 34
3916 .ve
3917 
3918    This can be represented as a collection of submatrices as:
3919 
3920 .vb
3921       A B C
3922       D E F
3923       G H I
3924 .ve
3925 
3926    Where the submatrices A,B,C are owned by proc0, D,E,F are
3927    owned by proc1, G,H,I are owned by proc2.
3928 
3929    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3930    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3931    The 'M','N' parameters are 8,8, and have the same values on all procs.
3932 
3933    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3934    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3935    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3936    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3937    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3938    matrix, ans [DF] as another SeqAIJ matrix.
3939 
3940    When d_nz, o_nz parameters are specified, d_nz storage elements are
3941    allocated for every row of the local diagonal submatrix, and o_nz
3942    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3943    One way to choose d_nz and o_nz is to use the max nonzerors per local
3944    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3945    In this case, the values of d_nz,o_nz are:
3946 .vb
3947      proc0 : dnz = 2, o_nz = 2
3948      proc1 : dnz = 3, o_nz = 2
3949      proc2 : dnz = 1, o_nz = 4
3950 .ve
3951    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3952    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3953    for proc3. i.e we are using 12+15+10=37 storage locations to store
3954    34 values.
3955 
3956    When d_nnz, o_nnz parameters are specified, the storage is specified
3957    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3958    In the above case the values for d_nnz,o_nnz are:
3959 .vb
3960      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3961      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3962      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3963 .ve
3964    Here the space allocated is sum of all the above values i.e 34, and
3965    hence pre-allocation is perfect.
3966 
3967    Level: intermediate
3968 
3969 .keywords: matrix, aij, compressed row, sparse, parallel
3970 
3971 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3972           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3973 @*/
3974 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3975 {
3976   PetscErrorCode ierr;
3977 
3978   PetscFunctionBegin;
3979   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3980   PetscValidType(B,1);
3981   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3982   PetscFunctionReturn(0);
3983 }
3984 
3985 /*@
3986      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3987          CSR format the local rows.
3988 
3989    Collective on MPI_Comm
3990 
3991    Input Parameters:
3992 +  comm - MPI communicator
3993 .  m - number of local rows (Cannot be PETSC_DECIDE)
3994 .  n - This value should be the same as the local size used in creating the
3995        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3996        calculated if N is given) For square matrices n is almost always m.
3997 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3998 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3999 .   i - row indices
4000 .   j - column indices
4001 -   a - matrix values
4002 
4003    Output Parameter:
4004 .   mat - the matrix
4005 
4006    Level: intermediate
4007 
4008    Notes:
4009        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4010      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4011      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4012 
4013        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4014 
4015        The format which is used for the sparse matrix input, is equivalent to a
4016     row-major ordering.. i.e for the following matrix, the input data expected is
4017     as shown
4018 
4019 $        1 0 0
4020 $        2 0 3     P0
4021 $       -------
4022 $        4 5 6     P1
4023 $
4024 $     Process0 [P0]: rows_owned=[0,1]
4025 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4026 $        j =  {0,0,2}  [size = 3]
4027 $        v =  {1,2,3}  [size = 3]
4028 $
4029 $     Process1 [P1]: rows_owned=[2]
4030 $        i =  {0,3}    [size = nrow+1  = 1+1]
4031 $        j =  {0,1,2}  [size = 3]
4032 $        v =  {4,5,6}  [size = 3]
4033 
4034 .keywords: matrix, aij, compressed row, sparse, parallel
4035 
4036 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4037           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4038 @*/
4039 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4040 {
4041   PetscErrorCode ierr;
4042 
4043   PetscFunctionBegin;
4044   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4045   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4046   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4047   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4048   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4049   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4050   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4051   PetscFunctionReturn(0);
4052 }
4053 
4054 /*@C
4055    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4056    (the default parallel PETSc format).  For good matrix assembly performance
4057    the user should preallocate the matrix storage by setting the parameters
4058    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4059    performance can be increased by more than a factor of 50.
4060 
4061    Collective on MPI_Comm
4062 
4063    Input Parameters:
4064 +  comm - MPI communicator
4065 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4066            This value should be the same as the local size used in creating the
4067            y vector for the matrix-vector product y = Ax.
4068 .  n - This value should be the same as the local size used in creating the
4069        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4070        calculated if N is given) For square matrices n is almost always m.
4071 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4072 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4073 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4074            (same value is used for all local rows)
4075 .  d_nnz - array containing the number of nonzeros in the various rows of the
4076            DIAGONAL portion of the local submatrix (possibly different for each row)
4077            or NULL, if d_nz is used to specify the nonzero structure.
4078            The size of this array is equal to the number of local rows, i.e 'm'.
4079 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4080            submatrix (same value is used for all local rows).
4081 -  o_nnz - array containing the number of nonzeros in the various rows of the
4082            OFF-DIAGONAL portion of the local submatrix (possibly different for
4083            each row) or NULL, if o_nz is used to specify the nonzero
4084            structure. The size of this array is equal to the number
4085            of local rows, i.e 'm'.
4086 
4087    Output Parameter:
4088 .  A - the matrix
4089 
4090    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4091    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4092    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4093 
4094    Notes:
4095    If the *_nnz parameter is given then the *_nz parameter is ignored
4096 
4097    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4098    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4099    storage requirements for this matrix.
4100 
4101    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4102    processor than it must be used on all processors that share the object for
4103    that argument.
4104 
4105    The user MUST specify either the local or global matrix dimensions
4106    (possibly both).
4107 
4108    The parallel matrix is partitioned across processors such that the
4109    first m0 rows belong to process 0, the next m1 rows belong to
4110    process 1, the next m2 rows belong to process 2 etc.. where
4111    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4112    values corresponding to [m x N] submatrix.
4113 
4114    The columns are logically partitioned with the n0 columns belonging
4115    to 0th partition, the next n1 columns belonging to the next
4116    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4117 
4118    The DIAGONAL portion of the local submatrix on any given processor
4119    is the submatrix corresponding to the rows and columns m,n
4120    corresponding to the given processor. i.e diagonal matrix on
4121    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4122    etc. The remaining portion of the local submatrix [m x (N-n)]
4123    constitute the OFF-DIAGONAL portion. The example below better
4124    illustrates this concept.
4125 
4126    For a square global matrix we define each processor's diagonal portion
4127    to be its local rows and the corresponding columns (a square submatrix);
4128    each processor's off-diagonal portion encompasses the remainder of the
4129    local matrix (a rectangular submatrix).
4130 
4131    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4132 
4133    When calling this routine with a single process communicator, a matrix of
4134    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4135    type of communicator, use the construction mechanism
4136 .vb
4137      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4138 .ve
4139 
4140 $     MatCreate(...,&A);
4141 $     MatSetType(A,MATMPIAIJ);
4142 $     MatSetSizes(A, m,n,M,N);
4143 $     MatMPIAIJSetPreallocation(A,...);
4144 
4145    By default, this format uses inodes (identical nodes) when possible.
4146    We search for consecutive rows with the same nonzero structure, thereby
4147    reusing matrix information to achieve increased efficiency.
4148 
4149    Options Database Keys:
4150 +  -mat_no_inode  - Do not use inodes
4151 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4152 -  -mat_aij_oneindex - Internally use indexing starting at 1
4153         rather than 0.  Note that when calling MatSetValues(),
4154         the user still MUST index entries starting at 0!
4155 
4156 
4157    Example usage:
4158 
4159    Consider the following 8x8 matrix with 34 non-zero values, that is
4160    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4161    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4162    as follows
4163 
4164 .vb
4165             1  2  0  |  0  3  0  |  0  4
4166     Proc0   0  5  6  |  7  0  0  |  8  0
4167             9  0 10  | 11  0  0  | 12  0
4168     -------------------------------------
4169            13  0 14  | 15 16 17  |  0  0
4170     Proc1   0 18  0  | 19 20 21  |  0  0
4171             0  0  0  | 22 23  0  | 24  0
4172     -------------------------------------
4173     Proc2  25 26 27  |  0  0 28  | 29  0
4174            30  0  0  | 31 32 33  |  0 34
4175 .ve
4176 
4177    This can be represented as a collection of submatrices as
4178 
4179 .vb
4180       A B C
4181       D E F
4182       G H I
4183 .ve
4184 
4185    Where the submatrices A,B,C are owned by proc0, D,E,F are
4186    owned by proc1, G,H,I are owned by proc2.
4187 
4188    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4189    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4190    The 'M','N' parameters are 8,8, and have the same values on all procs.
4191 
4192    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4193    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4194    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4195    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4196    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4197    matrix, ans [DF] as another SeqAIJ matrix.
4198 
4199    When d_nz, o_nz parameters are specified, d_nz storage elements are
4200    allocated for every row of the local diagonal submatrix, and o_nz
4201    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4202    One way to choose d_nz and o_nz is to use the max nonzerors per local
4203    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4204    In this case, the values of d_nz,o_nz are
4205 .vb
4206      proc0 : dnz = 2, o_nz = 2
4207      proc1 : dnz = 3, o_nz = 2
4208      proc2 : dnz = 1, o_nz = 4
4209 .ve
4210    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4211    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4212    for proc3. i.e we are using 12+15+10=37 storage locations to store
4213    34 values.
4214 
4215    When d_nnz, o_nnz parameters are specified, the storage is specified
4216    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4217    In the above case the values for d_nnz,o_nnz are
4218 .vb
4219      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4220      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4221      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4222 .ve
4223    Here the space allocated is sum of all the above values i.e 34, and
4224    hence pre-allocation is perfect.
4225 
4226    Level: intermediate
4227 
4228 .keywords: matrix, aij, compressed row, sparse, parallel
4229 
4230 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4231           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4232 @*/
4233 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4234 {
4235   PetscErrorCode ierr;
4236   PetscMPIInt    size;
4237 
4238   PetscFunctionBegin;
4239   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4240   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4241   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4242   if (size > 1) {
4243     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4244     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4245   } else {
4246     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4247     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4248   }
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4253 {
4254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4255   PetscBool      flg;
4256   PetscErrorCode ierr;
4257 
4258   PetscFunctionBegin;
4259   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4260   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4261   if (Ad)     *Ad     = a->A;
4262   if (Ao)     *Ao     = a->B;
4263   if (colmap) *colmap = a->garray;
4264   PetscFunctionReturn(0);
4265 }
4266 
4267 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4268 {
4269   PetscErrorCode ierr;
4270   PetscInt       m,N,i,rstart,nnz,Ii;
4271   PetscInt       *indx;
4272   PetscScalar    *values;
4273 
4274   PetscFunctionBegin;
4275   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4276   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4277     PetscInt       *dnz,*onz,sum,bs,cbs;
4278 
4279     if (n == PETSC_DECIDE) {
4280       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4281     }
4282     /* Check sum(n) = N */
4283     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4284     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4285 
4286     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4287     rstart -= m;
4288 
4289     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4290     for (i=0; i<m; i++) {
4291       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4292       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4293       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4294     }
4295 
4296     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4297     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4298     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4299     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4300     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4301     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4302     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4303     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4304   }
4305 
4306   /* numeric phase */
4307   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4308   for (i=0; i<m; i++) {
4309     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4310     Ii   = i + rstart;
4311     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4312     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4313   }
4314   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4315   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4316   PetscFunctionReturn(0);
4317 }
4318 
4319 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4320 {
4321   PetscErrorCode    ierr;
4322   PetscMPIInt       rank;
4323   PetscInt          m,N,i,rstart,nnz;
4324   size_t            len;
4325   const PetscInt    *indx;
4326   PetscViewer       out;
4327   char              *name;
4328   Mat               B;
4329   const PetscScalar *values;
4330 
4331   PetscFunctionBegin;
4332   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4333   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4334   /* Should this be the type of the diagonal block of A? */
4335   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4336   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4337   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4338   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4339   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4340   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4341   for (i=0; i<m; i++) {
4342     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4343     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4344     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4345   }
4346   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4347   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4348 
4349   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4350   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4351   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4352   sprintf(name,"%s.%d",outfile,rank);
4353   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4354   ierr = PetscFree(name);CHKERRQ(ierr);
4355   ierr = MatView(B,out);CHKERRQ(ierr);
4356   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4357   ierr = MatDestroy(&B);CHKERRQ(ierr);
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4362 {
4363   PetscErrorCode      ierr;
4364   Mat_Merge_SeqsToMPI *merge;
4365   PetscContainer      container;
4366 
4367   PetscFunctionBegin;
4368   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4369   if (container) {
4370     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4371     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4372     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4373     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4374     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4375     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4376     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4377     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4378     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4379     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4380     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4381     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4382     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4383     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4384     ierr = PetscFree(merge);CHKERRQ(ierr);
4385     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4386   }
4387   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4388   PetscFunctionReturn(0);
4389 }
4390 
4391 #include <../src/mat/utils/freespace.h>
4392 #include <petscbt.h>
4393 
4394 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4395 {
4396   PetscErrorCode      ierr;
4397   MPI_Comm            comm;
4398   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4399   PetscMPIInt         size,rank,taga,*len_s;
4400   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4401   PetscInt            proc,m;
4402   PetscInt            **buf_ri,**buf_rj;
4403   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4404   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4405   MPI_Request         *s_waits,*r_waits;
4406   MPI_Status          *status;
4407   MatScalar           *aa=a->a;
4408   MatScalar           **abuf_r,*ba_i;
4409   Mat_Merge_SeqsToMPI *merge;
4410   PetscContainer      container;
4411 
4412   PetscFunctionBegin;
4413   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4414   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4415 
4416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4417   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4418 
4419   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4420   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4421 
4422   bi     = merge->bi;
4423   bj     = merge->bj;
4424   buf_ri = merge->buf_ri;
4425   buf_rj = merge->buf_rj;
4426 
4427   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4428   owners = merge->rowmap->range;
4429   len_s  = merge->len_s;
4430 
4431   /* send and recv matrix values */
4432   /*-----------------------------*/
4433   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4434   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4435 
4436   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4437   for (proc=0,k=0; proc<size; proc++) {
4438     if (!len_s[proc]) continue;
4439     i    = owners[proc];
4440     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4441     k++;
4442   }
4443 
4444   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4445   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4446   ierr = PetscFree(status);CHKERRQ(ierr);
4447 
4448   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4449   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4450 
4451   /* insert mat values of mpimat */
4452   /*----------------------------*/
4453   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4454   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4455 
4456   for (k=0; k<merge->nrecv; k++) {
4457     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4458     nrows       = *(buf_ri_k[k]);
4459     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4460     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4461   }
4462 
4463   /* set values of ba */
4464   m = merge->rowmap->n;
4465   for (i=0; i<m; i++) {
4466     arow = owners[rank] + i;
4467     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4468     bnzi = bi[i+1] - bi[i];
4469     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4470 
4471     /* add local non-zero vals of this proc's seqmat into ba */
4472     anzi   = ai[arow+1] - ai[arow];
4473     aj     = a->j + ai[arow];
4474     aa     = a->a + ai[arow];
4475     nextaj = 0;
4476     for (j=0; nextaj<anzi; j++) {
4477       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4478         ba_i[j] += aa[nextaj++];
4479       }
4480     }
4481 
4482     /* add received vals into ba */
4483     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4484       /* i-th row */
4485       if (i == *nextrow[k]) {
4486         anzi   = *(nextai[k]+1) - *nextai[k];
4487         aj     = buf_rj[k] + *(nextai[k]);
4488         aa     = abuf_r[k] + *(nextai[k]);
4489         nextaj = 0;
4490         for (j=0; nextaj<anzi; j++) {
4491           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4492             ba_i[j] += aa[nextaj++];
4493           }
4494         }
4495         nextrow[k]++; nextai[k]++;
4496       }
4497     }
4498     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4499   }
4500   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4502 
4503   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4504   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4505   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4506   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4507   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4508   PetscFunctionReturn(0);
4509 }
4510 
4511 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4512 {
4513   PetscErrorCode      ierr;
4514   Mat                 B_mpi;
4515   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4516   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4517   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4518   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4519   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4520   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4521   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4522   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4523   MPI_Status          *status;
4524   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4525   PetscBT             lnkbt;
4526   Mat_Merge_SeqsToMPI *merge;
4527   PetscContainer      container;
4528 
4529   PetscFunctionBegin;
4530   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4531 
4532   /* make sure it is a PETSc comm */
4533   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4535   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4536 
4537   ierr = PetscNew(&merge);CHKERRQ(ierr);
4538   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4539 
4540   /* determine row ownership */
4541   /*---------------------------------------------------------*/
4542   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4543   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4544   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4545   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4546   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4547   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4548   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4549 
4550   m      = merge->rowmap->n;
4551   owners = merge->rowmap->range;
4552 
4553   /* determine the number of messages to send, their lengths */
4554   /*---------------------------------------------------------*/
4555   len_s = merge->len_s;
4556 
4557   len          = 0; /* length of buf_si[] */
4558   merge->nsend = 0;
4559   for (proc=0; proc<size; proc++) {
4560     len_si[proc] = 0;
4561     if (proc == rank) {
4562       len_s[proc] = 0;
4563     } else {
4564       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4565       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4566     }
4567     if (len_s[proc]) {
4568       merge->nsend++;
4569       nrows = 0;
4570       for (i=owners[proc]; i<owners[proc+1]; i++) {
4571         if (ai[i+1] > ai[i]) nrows++;
4572       }
4573       len_si[proc] = 2*(nrows+1);
4574       len         += len_si[proc];
4575     }
4576   }
4577 
4578   /* determine the number and length of messages to receive for ij-structure */
4579   /*-------------------------------------------------------------------------*/
4580   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4581   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4582 
4583   /* post the Irecv of j-structure */
4584   /*-------------------------------*/
4585   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4586   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4587 
4588   /* post the Isend of j-structure */
4589   /*--------------------------------*/
4590   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4591 
4592   for (proc=0, k=0; proc<size; proc++) {
4593     if (!len_s[proc]) continue;
4594     i    = owners[proc];
4595     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4596     k++;
4597   }
4598 
4599   /* receives and sends of j-structure are complete */
4600   /*------------------------------------------------*/
4601   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4602   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4603 
4604   /* send and recv i-structure */
4605   /*---------------------------*/
4606   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4607   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4608 
4609   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4610   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4611   for (proc=0,k=0; proc<size; proc++) {
4612     if (!len_s[proc]) continue;
4613     /* form outgoing message for i-structure:
4614          buf_si[0]:                 nrows to be sent
4615                [1:nrows]:           row index (global)
4616                [nrows+1:2*nrows+1]: i-structure index
4617     */
4618     /*-------------------------------------------*/
4619     nrows       = len_si[proc]/2 - 1;
4620     buf_si_i    = buf_si + nrows+1;
4621     buf_si[0]   = nrows;
4622     buf_si_i[0] = 0;
4623     nrows       = 0;
4624     for (i=owners[proc]; i<owners[proc+1]; i++) {
4625       anzi = ai[i+1] - ai[i];
4626       if (anzi) {
4627         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4628         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4629         nrows++;
4630       }
4631     }
4632     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4633     k++;
4634     buf_si += len_si[proc];
4635   }
4636 
4637   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4638   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4639 
4640   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4641   for (i=0; i<merge->nrecv; i++) {
4642     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4643   }
4644 
4645   ierr = PetscFree(len_si);CHKERRQ(ierr);
4646   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4647   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4648   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4649   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4650   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4651   ierr = PetscFree(status);CHKERRQ(ierr);
4652 
4653   /* compute a local seq matrix in each processor */
4654   /*----------------------------------------------*/
4655   /* allocate bi array and free space for accumulating nonzero column info */
4656   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4657   bi[0] = 0;
4658 
4659   /* create and initialize a linked list */
4660   nlnk = N+1;
4661   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4662 
4663   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4664   len  = ai[owners[rank+1]] - ai[owners[rank]];
4665   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4666 
4667   current_space = free_space;
4668 
4669   /* determine symbolic info for each local row */
4670   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4671 
4672   for (k=0; k<merge->nrecv; k++) {
4673     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4674     nrows       = *buf_ri_k[k];
4675     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4676     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4677   }
4678 
4679   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4680   len  = 0;
4681   for (i=0; i<m; i++) {
4682     bnzi = 0;
4683     /* add local non-zero cols of this proc's seqmat into lnk */
4684     arow  = owners[rank] + i;
4685     anzi  = ai[arow+1] - ai[arow];
4686     aj    = a->j + ai[arow];
4687     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4688     bnzi += nlnk;
4689     /* add received col data into lnk */
4690     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4691       if (i == *nextrow[k]) { /* i-th row */
4692         anzi  = *(nextai[k]+1) - *nextai[k];
4693         aj    = buf_rj[k] + *nextai[k];
4694         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4695         bnzi += nlnk;
4696         nextrow[k]++; nextai[k]++;
4697       }
4698     }
4699     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4700 
4701     /* if free space is not available, make more free space */
4702     if (current_space->local_remaining<bnzi) {
4703       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4704       nspacedouble++;
4705     }
4706     /* copy data into free space, then initialize lnk */
4707     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4708     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4709 
4710     current_space->array           += bnzi;
4711     current_space->local_used      += bnzi;
4712     current_space->local_remaining -= bnzi;
4713 
4714     bi[i+1] = bi[i] + bnzi;
4715   }
4716 
4717   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4718 
4719   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4720   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4721   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4722 
4723   /* create symbolic parallel matrix B_mpi */
4724   /*---------------------------------------*/
4725   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4726   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4727   if (n==PETSC_DECIDE) {
4728     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4729   } else {
4730     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4731   }
4732   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4733   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4734   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4735   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4736   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4737 
4738   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4739   B_mpi->assembled    = PETSC_FALSE;
4740   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4741   merge->bi           = bi;
4742   merge->bj           = bj;
4743   merge->buf_ri       = buf_ri;
4744   merge->buf_rj       = buf_rj;
4745   merge->coi          = NULL;
4746   merge->coj          = NULL;
4747   merge->owners_co    = NULL;
4748 
4749   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4750 
4751   /* attach the supporting struct to B_mpi for reuse */
4752   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4753   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4754   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4755   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4756   *mpimat = B_mpi;
4757 
4758   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4759   PetscFunctionReturn(0);
4760 }
4761 
4762 /*@C
4763       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4764                  matrices from each processor
4765 
4766     Collective on MPI_Comm
4767 
4768    Input Parameters:
4769 +    comm - the communicators the parallel matrix will live on
4770 .    seqmat - the input sequential matrices
4771 .    m - number of local rows (or PETSC_DECIDE)
4772 .    n - number of local columns (or PETSC_DECIDE)
4773 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4774 
4775    Output Parameter:
4776 .    mpimat - the parallel matrix generated
4777 
4778     Level: advanced
4779 
4780    Notes:
4781      The dimensions of the sequential matrix in each processor MUST be the same.
4782      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4783      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4784 @*/
4785 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4786 {
4787   PetscErrorCode ierr;
4788   PetscMPIInt    size;
4789 
4790   PetscFunctionBegin;
4791   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4792   if (size == 1) {
4793     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4794     if (scall == MAT_INITIAL_MATRIX) {
4795       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4796     } else {
4797       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4798     }
4799     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4800     PetscFunctionReturn(0);
4801   }
4802   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4803   if (scall == MAT_INITIAL_MATRIX) {
4804     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4805   }
4806   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4807   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4808   PetscFunctionReturn(0);
4809 }
4810 
4811 /*@
4812      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4813           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4814           with MatGetSize()
4815 
4816     Not Collective
4817 
4818    Input Parameters:
4819 +    A - the matrix
4820 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4821 
4822    Output Parameter:
4823 .    A_loc - the local sequential matrix generated
4824 
4825     Level: developer
4826 
4827 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4828 
4829 @*/
4830 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4831 {
4832   PetscErrorCode ierr;
4833   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4834   Mat_SeqAIJ     *mat,*a,*b;
4835   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4836   MatScalar      *aa,*ba,*cam;
4837   PetscScalar    *ca;
4838   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4839   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4840   PetscBool      match;
4841   MPI_Comm       comm;
4842   PetscMPIInt    size;
4843 
4844   PetscFunctionBegin;
4845   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4846   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4847   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4848   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4849   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4850 
4851   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4852   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4853   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4854   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4855   aa = a->a; ba = b->a;
4856   if (scall == MAT_INITIAL_MATRIX) {
4857     if (size == 1) {
4858       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4859       PetscFunctionReturn(0);
4860     }
4861 
4862     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4863     ci[0] = 0;
4864     for (i=0; i<am; i++) {
4865       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4866     }
4867     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4868     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4869     k    = 0;
4870     for (i=0; i<am; i++) {
4871       ncols_o = bi[i+1] - bi[i];
4872       ncols_d = ai[i+1] - ai[i];
4873       /* off-diagonal portion of A */
4874       for (jo=0; jo<ncols_o; jo++) {
4875         col = cmap[*bj];
4876         if (col >= cstart) break;
4877         cj[k]   = col; bj++;
4878         ca[k++] = *ba++;
4879       }
4880       /* diagonal portion of A */
4881       for (j=0; j<ncols_d; j++) {
4882         cj[k]   = cstart + *aj++;
4883         ca[k++] = *aa++;
4884       }
4885       /* off-diagonal portion of A */
4886       for (j=jo; j<ncols_o; j++) {
4887         cj[k]   = cmap[*bj++];
4888         ca[k++] = *ba++;
4889       }
4890     }
4891     /* put together the new matrix */
4892     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4893     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4894     /* Since these are PETSc arrays, change flags to free them as necessary. */
4895     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4896     mat->free_a  = PETSC_TRUE;
4897     mat->free_ij = PETSC_TRUE;
4898     mat->nonew   = 0;
4899   } else if (scall == MAT_REUSE_MATRIX) {
4900     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4901     ci = mat->i; cj = mat->j; cam = mat->a;
4902     for (i=0; i<am; i++) {
4903       /* off-diagonal portion of A */
4904       ncols_o = bi[i+1] - bi[i];
4905       for (jo=0; jo<ncols_o; jo++) {
4906         col = cmap[*bj];
4907         if (col >= cstart) break;
4908         *cam++ = *ba++; bj++;
4909       }
4910       /* diagonal portion of A */
4911       ncols_d = ai[i+1] - ai[i];
4912       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4913       /* off-diagonal portion of A */
4914       for (j=jo; j<ncols_o; j++) {
4915         *cam++ = *ba++; bj++;
4916       }
4917     }
4918   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4919   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4920   PetscFunctionReturn(0);
4921 }
4922 
4923 /*@C
4924      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4925 
4926     Not Collective
4927 
4928    Input Parameters:
4929 +    A - the matrix
4930 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4931 -    row, col - index sets of rows and columns to extract (or NULL)
4932 
4933    Output Parameter:
4934 .    A_loc - the local sequential matrix generated
4935 
4936     Level: developer
4937 
4938 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4939 
4940 @*/
4941 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4942 {
4943   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4944   PetscErrorCode ierr;
4945   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4946   IS             isrowa,iscola;
4947   Mat            *aloc;
4948   PetscBool      match;
4949 
4950   PetscFunctionBegin;
4951   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4952   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4953   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4954   if (!row) {
4955     start = A->rmap->rstart; end = A->rmap->rend;
4956     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4957   } else {
4958     isrowa = *row;
4959   }
4960   if (!col) {
4961     start = A->cmap->rstart;
4962     cmap  = a->garray;
4963     nzA   = a->A->cmap->n;
4964     nzB   = a->B->cmap->n;
4965     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4966     ncols = 0;
4967     for (i=0; i<nzB; i++) {
4968       if (cmap[i] < start) idx[ncols++] = cmap[i];
4969       else break;
4970     }
4971     imark = i;
4972     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4973     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4974     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4975   } else {
4976     iscola = *col;
4977   }
4978   if (scall != MAT_INITIAL_MATRIX) {
4979     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4980     aloc[0] = *A_loc;
4981   }
4982   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4983   *A_loc = aloc[0];
4984   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4985   if (!row) {
4986     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4987   }
4988   if (!col) {
4989     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4990   }
4991   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 /*@C
4996     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4997 
4998     Collective on Mat
4999 
5000    Input Parameters:
5001 +    A,B - the matrices in mpiaij format
5002 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5003 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5004 
5005    Output Parameter:
5006 +    rowb, colb - index sets of rows and columns of B to extract
5007 -    B_seq - the sequential matrix generated
5008 
5009     Level: developer
5010 
5011 @*/
5012 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5013 {
5014   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5015   PetscErrorCode ierr;
5016   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5017   IS             isrowb,iscolb;
5018   Mat            *bseq=NULL;
5019 
5020   PetscFunctionBegin;
5021   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5022     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5023   }
5024   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5025 
5026   if (scall == MAT_INITIAL_MATRIX) {
5027     start = A->cmap->rstart;
5028     cmap  = a->garray;
5029     nzA   = a->A->cmap->n;
5030     nzB   = a->B->cmap->n;
5031     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5032     ncols = 0;
5033     for (i=0; i<nzB; i++) {  /* row < local row index */
5034       if (cmap[i] < start) idx[ncols++] = cmap[i];
5035       else break;
5036     }
5037     imark = i;
5038     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5039     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5040     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5041     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5042   } else {
5043     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5044     isrowb  = *rowb; iscolb = *colb;
5045     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5046     bseq[0] = *B_seq;
5047   }
5048   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5049   *B_seq = bseq[0];
5050   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5051   if (!rowb) {
5052     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5053   } else {
5054     *rowb = isrowb;
5055   }
5056   if (!colb) {
5057     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5058   } else {
5059     *colb = iscolb;
5060   }
5061   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5062   PetscFunctionReturn(0);
5063 }
5064 
5065 /*
5066     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5067     of the OFF-DIAGONAL portion of local A
5068 
5069     Collective on Mat
5070 
5071    Input Parameters:
5072 +    A,B - the matrices in mpiaij format
5073 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5074 
5075    Output Parameter:
5076 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5077 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5078 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5079 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5080 
5081     Level: developer
5082 
5083 */
5084 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5085 {
5086   VecScatter_MPI_General *gen_to,*gen_from;
5087   PetscErrorCode         ierr;
5088   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5089   Mat_SeqAIJ             *b_oth;
5090   VecScatter             ctx =a->Mvctx;
5091   MPI_Comm               comm;
5092   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5093   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5094   PetscInt               *rvalues,*svalues;
5095   MatScalar              *b_otha,*bufa,*bufA;
5096   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5097   MPI_Request            *rwaits = NULL,*swaits = NULL;
5098   MPI_Status             *sstatus,rstatus;
5099   PetscMPIInt            jj,size;
5100   PetscInt               *cols,sbs,rbs;
5101   PetscScalar            *vals;
5102 
5103   PetscFunctionBegin;
5104   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5105   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5106 
5107   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5108     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5109   }
5110   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5111   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5112 
5113   if (size == 1) {
5114     startsj_s = NULL;
5115     bufa_ptr  = NULL;
5116     *B_oth    = NULL;
5117     PetscFunctionReturn(0);
5118   }
5119 
5120   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5121   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5122   nrecvs   = gen_from->n;
5123   nsends   = gen_to->n;
5124 
5125   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5126   srow    = gen_to->indices;    /* local row index to be sent */
5127   sstarts = gen_to->starts;
5128   sprocs  = gen_to->procs;
5129   sstatus = gen_to->sstatus;
5130   sbs     = gen_to->bs;
5131   rstarts = gen_from->starts;
5132   rprocs  = gen_from->procs;
5133   rbs     = gen_from->bs;
5134 
5135   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5136   if (scall == MAT_INITIAL_MATRIX) {
5137     /* i-array */
5138     /*---------*/
5139     /*  post receives */
5140     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5141     for (i=0; i<nrecvs; i++) {
5142       rowlen = rvalues + rstarts[i]*rbs;
5143       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5144       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5145     }
5146 
5147     /* pack the outgoing message */
5148     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5149 
5150     sstartsj[0] = 0;
5151     rstartsj[0] = 0;
5152     len         = 0; /* total length of j or a array to be sent */
5153     k           = 0;
5154     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5155     for (i=0; i<nsends; i++) {
5156       rowlen = svalues + sstarts[i]*sbs;
5157       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5158       for (j=0; j<nrows; j++) {
5159         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5160         for (l=0; l<sbs; l++) {
5161           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5162 
5163           rowlen[j*sbs+l] = ncols;
5164 
5165           len += ncols;
5166           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5167         }
5168         k++;
5169       }
5170       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5171 
5172       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5173     }
5174     /* recvs and sends of i-array are completed */
5175     i = nrecvs;
5176     while (i--) {
5177       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5178     }
5179     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5180     ierr = PetscFree(svalues);CHKERRQ(ierr);
5181 
5182     /* allocate buffers for sending j and a arrays */
5183     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5184     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5185 
5186     /* create i-array of B_oth */
5187     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5188 
5189     b_othi[0] = 0;
5190     len       = 0; /* total length of j or a array to be received */
5191     k         = 0;
5192     for (i=0; i<nrecvs; i++) {
5193       rowlen = rvalues + rstarts[i]*rbs;
5194       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5195       for (j=0; j<nrows; j++) {
5196         b_othi[k+1] = b_othi[k] + rowlen[j];
5197         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5198         k++;
5199       }
5200       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5201     }
5202     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5203 
5204     /* allocate space for j and a arrrays of B_oth */
5205     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5206     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5207 
5208     /* j-array */
5209     /*---------*/
5210     /*  post receives of j-array */
5211     for (i=0; i<nrecvs; i++) {
5212       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5213       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5214     }
5215 
5216     /* pack the outgoing message j-array */
5217     k = 0;
5218     for (i=0; i<nsends; i++) {
5219       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5220       bufJ  = bufj+sstartsj[i];
5221       for (j=0; j<nrows; j++) {
5222         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5223         for (ll=0; ll<sbs; ll++) {
5224           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5225           for (l=0; l<ncols; l++) {
5226             *bufJ++ = cols[l];
5227           }
5228           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5229         }
5230       }
5231       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5232     }
5233 
5234     /* recvs and sends of j-array are completed */
5235     i = nrecvs;
5236     while (i--) {
5237       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5238     }
5239     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5240   } else if (scall == MAT_REUSE_MATRIX) {
5241     sstartsj = *startsj_s;
5242     rstartsj = *startsj_r;
5243     bufa     = *bufa_ptr;
5244     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5245     b_otha   = b_oth->a;
5246   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5247 
5248   /* a-array */
5249   /*---------*/
5250   /*  post receives of a-array */
5251   for (i=0; i<nrecvs; i++) {
5252     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5253     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5254   }
5255 
5256   /* pack the outgoing message a-array */
5257   k = 0;
5258   for (i=0; i<nsends; i++) {
5259     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5260     bufA  = bufa+sstartsj[i];
5261     for (j=0; j<nrows; j++) {
5262       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5263       for (ll=0; ll<sbs; ll++) {
5264         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5265         for (l=0; l<ncols; l++) {
5266           *bufA++ = vals[l];
5267         }
5268         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5269       }
5270     }
5271     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5272   }
5273   /* recvs and sends of a-array are completed */
5274   i = nrecvs;
5275   while (i--) {
5276     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5277   }
5278   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5279   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5280 
5281   if (scall == MAT_INITIAL_MATRIX) {
5282     /* put together the new matrix */
5283     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5284 
5285     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5286     /* Since these are PETSc arrays, change flags to free them as necessary. */
5287     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5288     b_oth->free_a  = PETSC_TRUE;
5289     b_oth->free_ij = PETSC_TRUE;
5290     b_oth->nonew   = 0;
5291 
5292     ierr = PetscFree(bufj);CHKERRQ(ierr);
5293     if (!startsj_s || !bufa_ptr) {
5294       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5295       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5296     } else {
5297       *startsj_s = sstartsj;
5298       *startsj_r = rstartsj;
5299       *bufa_ptr  = bufa;
5300     }
5301   }
5302   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5303   PetscFunctionReturn(0);
5304 }
5305 
5306 /*@C
5307   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5308 
5309   Not Collective
5310 
5311   Input Parameters:
5312 . A - The matrix in mpiaij format
5313 
5314   Output Parameter:
5315 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5316 . colmap - A map from global column index to local index into lvec
5317 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5318 
5319   Level: developer
5320 
5321 @*/
5322 #if defined(PETSC_USE_CTABLE)
5323 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5324 #else
5325 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5326 #endif
5327 {
5328   Mat_MPIAIJ *a;
5329 
5330   PetscFunctionBegin;
5331   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5332   PetscValidPointer(lvec, 2);
5333   PetscValidPointer(colmap, 3);
5334   PetscValidPointer(multScatter, 4);
5335   a = (Mat_MPIAIJ*) A->data;
5336   if (lvec) *lvec = a->lvec;
5337   if (colmap) *colmap = a->colmap;
5338   if (multScatter) *multScatter = a->Mvctx;
5339   PetscFunctionReturn(0);
5340 }
5341 
5342 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5343 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5344 #if defined(PETSC_HAVE_MKL_SPARSE)
5345 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5346 #endif
5347 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5348 #if defined(PETSC_HAVE_ELEMENTAL)
5349 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5350 #endif
5351 #if defined(PETSC_HAVE_HYPRE)
5352 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5353 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5354 #endif
5355 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5356 
5357 /*
5358     Computes (B'*A')' since computing B*A directly is untenable
5359 
5360                n                       p                          p
5361         (              )       (              )         (                  )
5362       m (      A       )  *  n (       B      )   =   m (         C        )
5363         (              )       (              )         (                  )
5364 
5365 */
5366 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5367 {
5368   PetscErrorCode ierr;
5369   Mat            At,Bt,Ct;
5370 
5371   PetscFunctionBegin;
5372   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5373   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5374   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5375   ierr = MatDestroy(&At);CHKERRQ(ierr);
5376   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5377   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5378   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5379   PetscFunctionReturn(0);
5380 }
5381 
5382 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5383 {
5384   PetscErrorCode ierr;
5385   PetscInt       m=A->rmap->n,n=B->cmap->n;
5386   Mat            Cmat;
5387 
5388   PetscFunctionBegin;
5389   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5390   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5391   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5392   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5393   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5394   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5395   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5396   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5397 
5398   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5399 
5400   *C = Cmat;
5401   PetscFunctionReturn(0);
5402 }
5403 
5404 /* ----------------------------------------------------------------*/
5405 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5406 {
5407   PetscErrorCode ierr;
5408 
5409   PetscFunctionBegin;
5410   if (scall == MAT_INITIAL_MATRIX) {
5411     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5412     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5413     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5414   }
5415   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5416   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5417   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5418   PetscFunctionReturn(0);
5419 }
5420 
5421 /*MC
5422    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5423 
5424    Options Database Keys:
5425 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5426 
5427   Level: beginner
5428 
5429 .seealso: MatCreateAIJ()
5430 M*/
5431 
5432 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5433 {
5434   Mat_MPIAIJ     *b;
5435   PetscErrorCode ierr;
5436   PetscMPIInt    size;
5437 
5438   PetscFunctionBegin;
5439   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5440 
5441   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5442   B->data       = (void*)b;
5443   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5444   B->assembled  = PETSC_FALSE;
5445   B->insertmode = NOT_SET_VALUES;
5446   b->size       = size;
5447 
5448   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5449 
5450   /* build cache for off array entries formed */
5451   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5452 
5453   b->donotstash  = PETSC_FALSE;
5454   b->colmap      = 0;
5455   b->garray      = 0;
5456   b->roworiented = PETSC_TRUE;
5457 
5458   /* stuff used for matrix vector multiply */
5459   b->lvec  = NULL;
5460   b->Mvctx = NULL;
5461 
5462   /* stuff for MatGetRow() */
5463   b->rowindices   = 0;
5464   b->rowvalues    = 0;
5465   b->getrowactive = PETSC_FALSE;
5466 
5467   /* flexible pointer used in CUSP/CUSPARSE classes */
5468   b->spptr = NULL;
5469 
5470   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5471   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5472   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5473   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5474   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5475   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5476   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5477   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5478 #if defined(PETSC_HAVE_MKL_SPARSE)
5479   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5480 #endif
5481   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5482   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5483 #if defined(PETSC_HAVE_ELEMENTAL)
5484   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5485 #endif
5486 #if defined(PETSC_HAVE_HYPRE)
5487   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5488 #endif
5489   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5490   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5491   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5492   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5493 #if defined(PETSC_HAVE_HYPRE)
5494   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5495 #endif
5496   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5497   PetscFunctionReturn(0);
5498 }
5499 
5500 /*@C
5501      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5502          and "off-diagonal" part of the matrix in CSR format.
5503 
5504    Collective on MPI_Comm
5505 
5506    Input Parameters:
5507 +  comm - MPI communicator
5508 .  m - number of local rows (Cannot be PETSC_DECIDE)
5509 .  n - This value should be the same as the local size used in creating the
5510        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5511        calculated if N is given) For square matrices n is almost always m.
5512 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5513 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5514 .   i - row indices for "diagonal" portion of matrix
5515 .   j - column indices
5516 .   a - matrix values
5517 .   oi - row indices for "off-diagonal" portion of matrix
5518 .   oj - column indices
5519 -   oa - matrix values
5520 
5521    Output Parameter:
5522 .   mat - the matrix
5523 
5524    Level: advanced
5525 
5526    Notes:
5527        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5528        must free the arrays once the matrix has been destroyed and not before.
5529 
5530        The i and j indices are 0 based
5531 
5532        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5533 
5534        This sets local rows and cannot be used to set off-processor values.
5535 
5536        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5537        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5538        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5539        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5540        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5541        communication if it is known that only local entries will be set.
5542 
5543 .keywords: matrix, aij, compressed row, sparse, parallel
5544 
5545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5546           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5547 @*/
5548 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5549 {
5550   PetscErrorCode ierr;
5551   Mat_MPIAIJ     *maij;
5552 
5553   PetscFunctionBegin;
5554   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5555   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5556   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5557   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5558   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5559   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5560   maij = (Mat_MPIAIJ*) (*mat)->data;
5561 
5562   (*mat)->preallocated = PETSC_TRUE;
5563 
5564   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5565   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5566 
5567   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5568   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5569 
5570   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5571   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5572   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5573   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5574 
5575   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5576   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5577   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5578   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5579   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5580   PetscFunctionReturn(0);
5581 }
5582 
5583 /*
5584     Special version for direct calls from Fortran
5585 */
5586 #include <petsc/private/fortranimpl.h>
5587 
5588 /* Change these macros so can be used in void function */
5589 #undef CHKERRQ
5590 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5591 #undef SETERRQ2
5592 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5593 #undef SETERRQ3
5594 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5595 #undef SETERRQ
5596 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5597 
5598 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5599 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5600 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5601 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5602 #else
5603 #endif
5604 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5605 {
5606   Mat            mat  = *mmat;
5607   PetscInt       m    = *mm, n = *mn;
5608   InsertMode     addv = *maddv;
5609   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5610   PetscScalar    value;
5611   PetscErrorCode ierr;
5612 
5613   MatCheckPreallocated(mat,1);
5614   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5615 
5616 #if defined(PETSC_USE_DEBUG)
5617   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5618 #endif
5619   {
5620     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5621     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5622     PetscBool roworiented = aij->roworiented;
5623 
5624     /* Some Variables required in the macro */
5625     Mat        A                 = aij->A;
5626     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5627     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5628     MatScalar  *aa               = a->a;
5629     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5630     Mat        B                 = aij->B;
5631     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5632     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5633     MatScalar  *ba               = b->a;
5634 
5635     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5636     PetscInt  nonew = a->nonew;
5637     MatScalar *ap1,*ap2;
5638 
5639     PetscFunctionBegin;
5640     for (i=0; i<m; i++) {
5641       if (im[i] < 0) continue;
5642 #if defined(PETSC_USE_DEBUG)
5643       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5644 #endif
5645       if (im[i] >= rstart && im[i] < rend) {
5646         row      = im[i] - rstart;
5647         lastcol1 = -1;
5648         rp1      = aj + ai[row];
5649         ap1      = aa + ai[row];
5650         rmax1    = aimax[row];
5651         nrow1    = ailen[row];
5652         low1     = 0;
5653         high1    = nrow1;
5654         lastcol2 = -1;
5655         rp2      = bj + bi[row];
5656         ap2      = ba + bi[row];
5657         rmax2    = bimax[row];
5658         nrow2    = bilen[row];
5659         low2     = 0;
5660         high2    = nrow2;
5661 
5662         for (j=0; j<n; j++) {
5663           if (roworiented) value = v[i*n+j];
5664           else value = v[i+j*m];
5665           if (in[j] >= cstart && in[j] < cend) {
5666             col = in[j] - cstart;
5667             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5668             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5669           } else if (in[j] < 0) continue;
5670 #if defined(PETSC_USE_DEBUG)
5671           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5672 #endif
5673           else {
5674             if (mat->was_assembled) {
5675               if (!aij->colmap) {
5676                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5677               }
5678 #if defined(PETSC_USE_CTABLE)
5679               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5680               col--;
5681 #else
5682               col = aij->colmap[in[j]] - 1;
5683 #endif
5684               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5685               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5686                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5687                 col  =  in[j];
5688                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5689                 B     = aij->B;
5690                 b     = (Mat_SeqAIJ*)B->data;
5691                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5692                 rp2   = bj + bi[row];
5693                 ap2   = ba + bi[row];
5694                 rmax2 = bimax[row];
5695                 nrow2 = bilen[row];
5696                 low2  = 0;
5697                 high2 = nrow2;
5698                 bm    = aij->B->rmap->n;
5699                 ba    = b->a;
5700               }
5701             } else col = in[j];
5702             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5703           }
5704         }
5705       } else if (!aij->donotstash) {
5706         if (roworiented) {
5707           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5708         } else {
5709           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5710         }
5711       }
5712     }
5713   }
5714   PetscFunctionReturnVoid();
5715 }
5716 
5717