xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1c6452426e4fbe88ea3c1fadda5eb8e5ba3540c9)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1042 {
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   /* do nondiagonal part */
1057   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1058   /* send it on its way */
1059   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   /* do local part */
1061   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   /* receive remote parts */
1063   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1095 {
1096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1105   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_CTABLE)
1108   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1109 #else
1110   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1114   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1115   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1116   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1117   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1118   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1129 #if defined(PETSC_HAVE_ELEMENTAL)
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1131 #endif
1132 #if defined(PETSC_HAVE_HYPRE)
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1135 #endif
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1143   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1144   PetscErrorCode ierr;
1145   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1146   int            fd;
1147   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1148   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1149   PetscScalar    *column_values;
1150   PetscInt       message_count,flowcontrolcount;
1151   FILE           *file;
1152 
1153   PetscFunctionBegin;
1154   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1155   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1156   nz   = A->nz + B->nz;
1157   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1158   if (!rank) {
1159     header[0] = MAT_FILE_CLASSID;
1160     header[1] = mat->rmap->N;
1161     header[2] = mat->cmap->N;
1162 
1163     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1165     /* get largest number of rows any processor has */
1166     rlen  = mat->rmap->n;
1167     range = mat->rmap->range;
1168     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1169   } else {
1170     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1171     rlen = mat->rmap->n;
1172   }
1173 
1174   /* load up the local row counts */
1175   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1176   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1177 
1178   /* store the row lengths to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1184       rlen = range[i+1] - range[i];
1185       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1191     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1195 
1196   /* load up the local column indices */
1197   nzmax = nz; /* th processor needs space a largest processor needs */
1198   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1199   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1200   cnt   = 0;
1201   for (i=0; i<mat->rmap->n; i++) {
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ((col = garray[B->j[j]]) > cstart) break;
1204       column_indices[cnt++] = col;
1205     }
1206     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1207     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1208   }
1209   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1210 
1211   /* store the column indices to the file */
1212   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1213   if (!rank) {
1214     MPI_Status status;
1215     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1216     for (i=1; i<size; i++) {
1217       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1218       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1219       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1220       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the local column values */
1233   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<mat->rmap->n; i++) {
1236     for (j=B->i[i]; j<B->i[i+1]; j++) {
1237       if (garray[B->j[j]] > cstart) break;
1238       column_values[cnt++] = B->a[j];
1239     }
1240     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1241     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1242   }
1243   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1244 
1245   /* store the column values to the file */
1246   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1247   if (!rank) {
1248     MPI_Status status;
1249     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1250     for (i=1; i<size; i++) {
1251       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1252       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1253       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1254       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1255       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1256     }
1257     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1258   } else {
1259     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1260     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1263   }
1264   ierr = PetscFree(column_values);CHKERRQ(ierr);
1265 
1266   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1267   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include <petscdraw.h>
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1275   PetscErrorCode    ierr;
1276   PetscMPIInt       rank = aij->rank,size = aij->size;
1277   PetscBool         isdraw,iascii,isbinary;
1278   PetscViewer       sviewer;
1279   PetscViewerFormat format;
1280 
1281   PetscFunctionBegin;
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1283   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1284   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1285   if (iascii) {
1286     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscBool inodes;
1290 
1291       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1292       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1295       if (!inodes) {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1298       } else {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       }
1302       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1306       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1309       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1314       if (inodes) {
1315         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1316       } else {
1317         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1326       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1327     } else {
1328       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   {
1340     /* assemble the entire matrix onto first processor. */
1341     Mat        A;
1342     Mat_SeqAIJ *Aloc;
1343     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1344     MatScalar  *a;
1345 
1346     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1347     if (!rank) {
1348       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1351     }
1352     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1353     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1354     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1355     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1356     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1357 
1358     /* copy over the A part */
1359     Aloc = (Mat_SeqAIJ*)aij->A->data;
1360     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1361     row  = mat->rmap->rstart;
1362     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1363     for (i=0; i<m; i++) {
1364       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1365       row++;
1366       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1367     }
1368     aj = Aloc->j;
1369     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1370 
1371     /* copy over the B part */
1372     Aloc = (Mat_SeqAIJ*)aij->B->data;
1373     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1374     row  = mat->rmap->rstart;
1375     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1376     ct   = cols;
1377     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1378     for (i=0; i<m; i++) {
1379       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1380       row++;
1381       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1382     }
1383     ierr = PetscFree(ct);CHKERRQ(ierr);
1384     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1385     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1386     /*
1387        Everyone has to call to draw the matrix since the graphics waits are
1388        synchronized across all processors that share the PetscDraw object
1389     */
1390     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     if (!rank) {
1392       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1393       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1394     }
1395     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1396     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1397     ierr = MatDestroy(&A);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1403 {
1404   PetscErrorCode ierr;
1405   PetscBool      iascii,isdraw,issocket,isbinary;
1406 
1407   PetscFunctionBegin;
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1410   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1412   if (iascii || isdraw || isbinary || issocket) {
1413     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1414   }
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1419 {
1420   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1421   PetscErrorCode ierr;
1422   Vec            bb1 = 0;
1423   PetscBool      hasop;
1424 
1425   PetscFunctionBegin;
1426   if (flag == SOR_APPLY_UPPER) {
1427     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1428     PetscFunctionReturn(0);
1429   }
1430 
1431   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1432     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1433   }
1434 
1435   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1436     if (flag & SOR_ZERO_INITIAL_GUESS) {
1437       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1438       its--;
1439     }
1440 
1441     while (its--) {
1442       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1444 
1445       /* update rhs: bb1 = bb - B*x */
1446       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1447       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1448 
1449       /* local sweep */
1450       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1451     }
1452   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1455       its--;
1456     }
1457     while (its--) {
1458       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460 
1461       /* update rhs: bb1 = bb - B*x */
1462       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1463       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1464 
1465       /* local sweep */
1466       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1467     }
1468   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1469     if (flag & SOR_ZERO_INITIAL_GUESS) {
1470       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471       its--;
1472     }
1473     while (its--) {
1474       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1475       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476 
1477       /* update rhs: bb1 = bb - B*x */
1478       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1479       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1480 
1481       /* local sweep */
1482       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1483     }
1484   } else if (flag & SOR_EISENSTAT) {
1485     Vec xx1;
1486 
1487     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1488     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1489 
1490     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492     if (!mat->diag) {
1493       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1494       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1495     }
1496     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1497     if (hasop) {
1498       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1499     } else {
1500       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1501     }
1502     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1503 
1504     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1505 
1506     /* local sweep */
1507     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1508     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1509     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1510   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1511 
1512   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1513 
1514   matin->factorerrortype = mat->A->factorerrortype;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1519 {
1520   Mat            aA,aB,Aperm;
1521   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1522   PetscScalar    *aa,*ba;
1523   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1524   PetscSF        rowsf,sf;
1525   IS             parcolp = NULL;
1526   PetscBool      done;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1531   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1532   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1533   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1534 
1535   /* Invert row permutation to find out where my rows should go */
1536   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1537   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1538   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1539   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1540   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1541   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1542 
1543   /* Invert column permutation to find out where my columns should go */
1544   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1545   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1546   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1547   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1548   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1549   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1550   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1551 
1552   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1553   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1554   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1555 
1556   /* Find out where my gcols should go */
1557   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1558   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1562   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1563   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1567   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1568   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1569   for (i=0; i<m; i++) {
1570     PetscInt row = rdest[i],rowner;
1571     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1572     for (j=ai[i]; j<ai[i+1]; j++) {
1573       PetscInt cowner,col = cdest[aj[j]];
1574       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1575       if (rowner == cowner) dnnz[i]++;
1576       else onnz[i]++;
1577     }
1578     for (j=bi[i]; j<bi[i+1]; j++) {
1579       PetscInt cowner,col = gcdest[bj[j]];
1580       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584   }
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1587   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1588   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1589   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1590 
1591   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1592   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1593   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) {
1595     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1596     PetscInt j0,rowlen;
1597     rowlen = ai[i+1] - ai[i];
1598     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1599       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1600       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1601     }
1602     rowlen = bi[i+1] - bi[i];
1603     for (j0=j=0; j<rowlen; j0=j) {
1604       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1605       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1606     }
1607   }
1608   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1610   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1613   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1614   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1615   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1616   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1617   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1618   *B = Aperm;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1623 {
1624   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1629   if (ghosts) *ghosts = aij->garray;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1634 {
1635   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1636   Mat            A    = mat->A,B = mat->B;
1637   PetscErrorCode ierr;
1638   PetscReal      isend[5],irecv[5];
1639 
1640   PetscFunctionBegin;
1641   info->block_size = 1.0;
1642   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1643 
1644   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1645   isend[3] = info->memory;  isend[4] = info->mallocs;
1646 
1647   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1648 
1649   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1650   isend[3] += info->memory;  isend[4] += info->mallocs;
1651   if (flag == MAT_LOCAL) {
1652     info->nz_used      = isend[0];
1653     info->nz_allocated = isend[1];
1654     info->nz_unneeded  = isend[2];
1655     info->memory       = isend[3];
1656     info->mallocs      = isend[4];
1657   } else if (flag == MAT_GLOBAL_MAX) {
1658     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1659 
1660     info->nz_used      = irecv[0];
1661     info->nz_allocated = irecv[1];
1662     info->nz_unneeded  = irecv[2];
1663     info->memory       = irecv[3];
1664     info->mallocs      = irecv[4];
1665   } else if (flag == MAT_GLOBAL_SUM) {
1666     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1667 
1668     info->nz_used      = irecv[0];
1669     info->nz_allocated = irecv[1];
1670     info->nz_unneeded  = irecv[2];
1671     info->memory       = irecv[3];
1672     info->mallocs      = irecv[4];
1673   }
1674   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1675   info->fill_ratio_needed = 0;
1676   info->factor_mallocs    = 0;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1681 {
1682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   switch (op) {
1687   case MAT_NEW_NONZERO_LOCATIONS:
1688   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1689   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1690   case MAT_KEEP_NONZERO_PATTERN:
1691   case MAT_NEW_NONZERO_LOCATION_ERR:
1692   case MAT_USE_INODES:
1693   case MAT_IGNORE_ZERO_ENTRIES:
1694     MatCheckPreallocated(A,1);
1695     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1696     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1697     break;
1698   case MAT_ROW_ORIENTED:
1699     MatCheckPreallocated(A,1);
1700     a->roworiented = flg;
1701 
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1704     break;
1705   case MAT_NEW_DIAGONALS:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   case MAT_SPD:
1712     A->spd_set = PETSC_TRUE;
1713     A->spd     = flg;
1714     if (flg) {
1715       A->symmetric                  = PETSC_TRUE;
1716       A->structurally_symmetric     = PETSC_TRUE;
1717       A->symmetric_set              = PETSC_TRUE;
1718       A->structurally_symmetric_set = PETSC_TRUE;
1719     }
1720     break;
1721   case MAT_SYMMETRIC:
1722     MatCheckPreallocated(A,1);
1723     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_STRUCTURALLY_SYMMETRIC:
1726     MatCheckPreallocated(A,1);
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_HERMITIAN:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_SYMMETRY_ETERNAL:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_SUBMAT_SINGLEIS:
1738     A->submat_singleis = flg;
1739     break;
1740   case MAT_STRUCTURE_ONLY:
1741     /* The option is handled directly by MatSetOption() */
1742     break;
1743   default:
1744     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1753   PetscErrorCode ierr;
1754   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1755   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1756   PetscInt       *cmap,*idx_p;
1757 
1758   PetscFunctionBegin;
1759   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1760   mat->getrowactive = PETSC_TRUE;
1761 
1762   if (!mat->rowvalues && (idx || v)) {
1763     /*
1764         allocate enough space to hold information from the longest row.
1765     */
1766     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1767     PetscInt   max = 1,tmp;
1768     for (i=0; i<matin->rmap->n; i++) {
1769       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1770       if (max < tmp) max = tmp;
1771     }
1772     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1773   }
1774 
1775   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1776   lrow = row - rstart;
1777 
1778   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1779   if (!v)   {pvA = 0; pvB = 0;}
1780   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1781   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1782   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v  || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i=0; i<nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1798         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i=0; i<imark; i++) {
1804             idx_p[i] = cmap[cworkB[i]];
1805           }
1806         } else {
1807           for (i=0; i<nzB; i++) {
1808             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1809             else break;
1810           }
1811           imark = i;
1812         }
1813         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1814         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1815       }
1816     } else {
1817       if (idx) *idx = 0;
1818       if (v)   *v   = 0;
1819     }
1820   }
1821   *nz  = nztot;
1822   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1823   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1828 {
1829   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1830 
1831   PetscFunctionBegin;
1832   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1833   aij->getrowactive = PETSC_FALSE;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1838 {
1839   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1840   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1841   PetscErrorCode ierr;
1842   PetscInt       i,j,cstart = mat->cmap->rstart;
1843   PetscReal      sum = 0.0;
1844   MatScalar      *v;
1845 
1846   PetscFunctionBegin;
1847   if (aij->size == 1) {
1848     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1849   } else {
1850     if (type == NORM_FROBENIUS) {
1851       v = amat->a;
1852       for (i=0; i<amat->nz; i++) {
1853         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1854       }
1855       v = bmat->a;
1856       for (i=0; i<bmat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1858       }
1859       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1860       *norm = PetscSqrtReal(*norm);
1861       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1862     } else if (type == NORM_1) { /* max column norm */
1863       PetscReal *tmp,*tmp2;
1864       PetscInt  *jj,*garray = aij->garray;
1865       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1866       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1867       *norm = 0.0;
1868       v     = amat->a; jj = amat->j;
1869       for (j=0; j<amat->nz; j++) {
1870         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1871       }
1872       v = bmat->a; jj = bmat->j;
1873       for (j=0; j<bmat->nz; j++) {
1874         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1875       }
1876       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1877       for (j=0; j<mat->cmap->N; j++) {
1878         if (tmp2[j] > *norm) *norm = tmp2[j];
1879       }
1880       ierr = PetscFree(tmp);CHKERRQ(ierr);
1881       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1882       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1883     } else if (type == NORM_INFINITY) { /* max row norm */
1884       PetscReal ntemp = 0.0;
1885       for (j=0; j<aij->A->rmap->n; j++) {
1886         v   = amat->a + amat->i[j];
1887         sum = 0.0;
1888         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1889           sum += PetscAbsScalar(*v); v++;
1890         }
1891         v = bmat->a + bmat->i[j];
1892         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         if (sum > ntemp) ntemp = sum;
1896       }
1897       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1899     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1905 {
1906   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1907   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1908   PetscErrorCode ierr;
1909   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1910   PetscInt       cstart = A->cmap->rstart,ncol;
1911   Mat            B;
1912   MatScalar      *array;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1916 
1917   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1918   ai = Aloc->i; aj = Aloc->j;
1919   bi = Bloc->i; bj = Bloc->j;
1920   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1921     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1922     PetscSFNode          *oloc;
1923     PETSC_UNUSED PetscSF sf;
1924 
1925     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1926     /* compute d_nnz for preallocation */
1927     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1928     for (i=0; i<ai[ma]; i++) {
1929       d_nnz[aj[i]]++;
1930       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1931     }
1932     /* compute local off-diagonal contributions */
1933     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1934     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1935     /* map those to global */
1936     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1937     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1938     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1939     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1940     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1942     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1943 
1944     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1945     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1946     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1947     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1948     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1949     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1950   } else {
1951     B    = *matout;
1952     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1953     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954   }
1955 
1956   /* copy over the A part */
1957   array = Aloc->a;
1958   row   = A->rmap->rstart;
1959   for (i=0; i<ma; i++) {
1960     ncol = ai[i+1]-ai[i];
1961     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1962     row++;
1963     array += ncol; aj += ncol;
1964   }
1965   aj = Aloc->j;
1966   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1967 
1968   /* copy over the B part */
1969   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1970   array = Bloc->a;
1971   row   = A->rmap->rstart;
1972   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1973   cols_tmp = cols;
1974   for (i=0; i<mb; i++) {
1975     ncol = bi[i+1]-bi[i];
1976     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++;
1978     array += ncol; cols_tmp += ncol;
1979   }
1980   ierr = PetscFree(cols);CHKERRQ(ierr);
1981 
1982   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1984   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1985     *matout = B;
1986   } else {
1987     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1993 {
1994   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1995   Mat            a    = aij->A,b = aij->B;
1996   PetscErrorCode ierr;
1997   PetscInt       s1,s2,s3;
1998 
1999   PetscFunctionBegin;
2000   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2001   if (rr) {
2002     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2003     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2004     /* Overlap communication with computation. */
2005     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2006   }
2007   if (ll) {
2008     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2009     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2010     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2011   }
2012   /* scale  the diagonal block */
2013   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2014 
2015   if (rr) {
2016     /* Do a scatter end and then right scale the off-diagonal block */
2017     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2019   }
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2024 {
2025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2034 {
2035   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2036   Mat            a,b,c,d;
2037   PetscBool      flg;
2038   PetscErrorCode ierr;
2039 
2040   PetscFunctionBegin;
2041   a = matA->A; b = matA->B;
2042   c = matB->A; d = matB->B;
2043 
2044   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2045   if (flg) {
2046     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2047   }
2048   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2053 {
2054   PetscErrorCode ierr;
2055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2056   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2057 
2058   PetscFunctionBegin;
2059   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2060   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2061     /* because of the column compression in the off-processor part of the matrix a->B,
2062        the number of columns in a->B and b->B may be different, hence we cannot call
2063        the MatCopy() directly on the two parts. If need be, we can provide a more
2064        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2065        then copying the submatrices */
2066     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2067   } else {
2068     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2069     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2070   }
2071   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2076 {
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 /*
2085    Computes the number of nonzeros per row needed for preallocation when X and Y
2086    have different nonzero structure.
2087 */
2088 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2089 {
2090   PetscInt       i,j,k,nzx,nzy;
2091 
2092   PetscFunctionBegin;
2093   /* Set the number of nonzeros in the new matrix */
2094   for (i=0; i<m; i++) {
2095     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2096     nzx = xi[i+1] - xi[i];
2097     nzy = yi[i+1] - yi[i];
2098     nnz[i] = 0;
2099     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2100       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2101       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2102       nnz[i]++;
2103     }
2104     for (; k<nzy; k++) nnz[i]++;
2105   }
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2110 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2111 {
2112   PetscErrorCode ierr;
2113   PetscInt       m = Y->rmap->N;
2114   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2115   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2116 
2117   PetscFunctionBegin;
2118   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x    = (Mat_SeqAIJ*)xx->A->data;
2133     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2134     y    = (Mat_SeqAIJ*)yy->A->data;
2135     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2136     x    = (Mat_SeqAIJ*)xx->B->data;
2137     y    = (Mat_SeqAIJ*)yy->B->data;
2138     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2141   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2142     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2143   } else {
2144     Mat      B;
2145     PetscInt *nnz_d,*nnz_o;
2146     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2147     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2148     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2149     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2150     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2151     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2152     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2153     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2154     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2155     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2156     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2157     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2158     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2159     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2165 
2166 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2167 {
2168 #if defined(PETSC_USE_COMPLEX)
2169   PetscErrorCode ierr;
2170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2174   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2175 #else
2176   PetscFunctionBegin;
2177 #endif
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2182 {
2183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2188   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2193 {
2194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2195   PetscErrorCode ierr;
2196 
2197   PetscFunctionBegin;
2198   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2199   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207   PetscInt       i,*idxb = 0;
2208   PetscScalar    *va,*vb;
2209   Vec            vtmp;
2210 
2211   PetscFunctionBegin;
2212   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2213   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2214   if (idx) {
2215     for (i=0; i<A->rmap->n; i++) {
2216       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2217     }
2218   }
2219 
2220   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2221   if (idx) {
2222     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2223   }
2224   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2225   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2226 
2227   for (i=0; i<A->rmap->n; i++) {
2228     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2229       va[i] = vb[i];
2230       if (idx) idx[i] = a->garray[idxb[i]];
2231     }
2232   }
2233 
2234   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2235   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2236   ierr = PetscFree(idxb);CHKERRQ(ierr);
2237   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2242 {
2243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2244   PetscErrorCode ierr;
2245   PetscInt       i,*idxb = 0;
2246   PetscScalar    *va,*vb;
2247   Vec            vtmp;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2251   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2252   if (idx) {
2253     for (i=0; i<A->cmap->n; i++) {
2254       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2255     }
2256   }
2257 
2258   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2259   if (idx) {
2260     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2261   }
2262   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2263   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2264 
2265   for (i=0; i<A->rmap->n; i++) {
2266     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2267       va[i] = vb[i];
2268       if (idx) idx[i] = a->garray[idxb[i]];
2269     }
2270   }
2271 
2272   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2273   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2274   ierr = PetscFree(idxb);CHKERRQ(ierr);
2275   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2280 {
2281   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2282   PetscInt       n      = A->rmap->n;
2283   PetscInt       cstart = A->cmap->rstart;
2284   PetscInt       *cmap  = mat->garray;
2285   PetscInt       *diagIdx, *offdiagIdx;
2286   Vec            diagV, offdiagV;
2287   PetscScalar    *a, *diagA, *offdiagA;
2288   PetscInt       r;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2293   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2294   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2295   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2296   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2297   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2298   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2299   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2300   for (r = 0; r < n; ++r) {
2301     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2302       a[r]   = diagA[r];
2303       idx[r] = cstart + diagIdx[r];
2304     } else {
2305       a[r]   = offdiagA[r];
2306       idx[r] = cmap[offdiagIdx[r]];
2307     }
2308   }
2309   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2310   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2311   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2312   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2313   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2314   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2319 {
2320   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2321   PetscInt       n      = A->rmap->n;
2322   PetscInt       cstart = A->cmap->rstart;
2323   PetscInt       *cmap  = mat->garray;
2324   PetscInt       *diagIdx, *offdiagIdx;
2325   Vec            diagV, offdiagV;
2326   PetscScalar    *a, *diagA, *offdiagA;
2327   PetscInt       r;
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2332   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2333   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2334   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2335   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2337   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2338   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2339   for (r = 0; r < n; ++r) {
2340     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2341       a[r]   = diagA[r];
2342       idx[r] = cstart + diagIdx[r];
2343     } else {
2344       a[r]   = offdiagA[r];
2345       idx[r] = cmap[offdiagIdx[r]];
2346     }
2347   }
2348   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2349   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2350   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2351   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2352   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2353   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2358 {
2359   PetscErrorCode ierr;
2360   Mat            *dummy;
2361 
2362   PetscFunctionBegin;
2363   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2364   *newmat = *dummy;
2365   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2370 {
2371   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2376   A->factorerrortype = a->A->factorerrortype;
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2381 {
2382   PetscErrorCode ierr;
2383   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2387   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2394 {
2395   PetscFunctionBegin;
2396   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2397   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 /*@
2402    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2403 
2404    Collective on Mat
2405 
2406    Input Parameters:
2407 +    A - the matrix
2408 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2409 
2410  Level: advanced
2411 
2412 @*/
2413 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2414 {
2415   PetscErrorCode       ierr;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2423 {
2424   PetscErrorCode       ierr;
2425   PetscBool            sc = PETSC_FALSE,flg;
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2429   ierr = PetscObjectOptionsBegin((PetscObject)A);
2430     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2431     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2432     if (flg) {
2433       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2434     }
2435   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2440 {
2441   PetscErrorCode ierr;
2442   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2443   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2444 
2445   PetscFunctionBegin;
2446   if (!Y->preallocated) {
2447     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2448   } else if (!aij->nz) {
2449     PetscInt nonew = aij->nonew;
2450     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2451     aij->nonew = nonew;
2452   }
2453   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2458 {
2459   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2464   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2465   if (d) {
2466     PetscInt rstart;
2467     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2468     *d += rstart;
2469 
2470   }
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 
2475 /* -------------------------------------------------------------------*/
2476 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2477                                        MatGetRow_MPIAIJ,
2478                                        MatRestoreRow_MPIAIJ,
2479                                        MatMult_MPIAIJ,
2480                                 /* 4*/ MatMultAdd_MPIAIJ,
2481                                        MatMultTranspose_MPIAIJ,
2482                                        MatMultTransposeAdd_MPIAIJ,
2483                                        0,
2484                                        0,
2485                                        0,
2486                                 /*10*/ 0,
2487                                        0,
2488                                        0,
2489                                        MatSOR_MPIAIJ,
2490                                        MatTranspose_MPIAIJ,
2491                                 /*15*/ MatGetInfo_MPIAIJ,
2492                                        MatEqual_MPIAIJ,
2493                                        MatGetDiagonal_MPIAIJ,
2494                                        MatDiagonalScale_MPIAIJ,
2495                                        MatNorm_MPIAIJ,
2496                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2497                                        MatAssemblyEnd_MPIAIJ,
2498                                        MatSetOption_MPIAIJ,
2499                                        MatZeroEntries_MPIAIJ,
2500                                 /*24*/ MatZeroRows_MPIAIJ,
2501                                        0,
2502                                        0,
2503                                        0,
2504                                        0,
2505                                 /*29*/ MatSetUp_MPIAIJ,
2506                                        0,
2507                                        0,
2508                                        MatGetDiagonalBlock_MPIAIJ,
2509                                        0,
2510                                 /*34*/ MatDuplicate_MPIAIJ,
2511                                        0,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                 /*39*/ MatAXPY_MPIAIJ,
2516                                        MatCreateSubMatrices_MPIAIJ,
2517                                        MatIncreaseOverlap_MPIAIJ,
2518                                        MatGetValues_MPIAIJ,
2519                                        MatCopy_MPIAIJ,
2520                                 /*44*/ MatGetRowMax_MPIAIJ,
2521                                        MatScale_MPIAIJ,
2522                                        MatShift_MPIAIJ,
2523                                        MatDiagonalSet_MPIAIJ,
2524                                        MatZeroRowsColumns_MPIAIJ,
2525                                 /*49*/ MatSetRandom_MPIAIJ,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2531                                        0,
2532                                        MatSetUnfactored_MPIAIJ,
2533                                        MatPermute_MPIAIJ,
2534                                        0,
2535                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2536                                        MatDestroy_MPIAIJ,
2537                                        MatView_MPIAIJ,
2538                                        0,
2539                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2540                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2541                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2546                                        MatGetRowMinAbs_MPIAIJ,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                 /*75*/ MatFDColoringApply_AIJ,
2552                                        MatSetFromOptions_MPIAIJ,
2553                                        0,
2554                                        0,
2555                                        MatFindZeroDiagonals_MPIAIJ,
2556                                 /*80*/ 0,
2557                                        0,
2558                                        0,
2559                                 /*83*/ MatLoad_MPIAIJ,
2560                                        MatIsSymmetric_MPIAIJ,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                        0,
2565                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2566                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2567                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2568                                        MatPtAP_MPIAIJ_MPIAIJ,
2569                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2570                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*99*/ 0,
2576                                        0,
2577                                        0,
2578                                        MatConjugate_MPIAIJ,
2579                                        0,
2580                                 /*104*/MatSetValuesRow_MPIAIJ,
2581                                        MatRealPart_MPIAIJ,
2582                                        MatImaginaryPart_MPIAIJ,
2583                                        0,
2584                                        0,
2585                                 /*109*/0,
2586                                        0,
2587                                        MatGetRowMin_MPIAIJ,
2588                                        0,
2589                                        MatMissingDiagonal_MPIAIJ,
2590                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2591                                        0,
2592                                        MatGetGhosts_MPIAIJ,
2593                                        0,
2594                                        0,
2595                                 /*119*/0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        MatGetMultiProcBlock_MPIAIJ,
2600                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2601                                        MatGetColumnNorms_MPIAIJ,
2602                                        MatInvertBlockDiagonal_MPIAIJ,
2603                                        0,
2604                                        MatCreateSubMatricesMPI_MPIAIJ,
2605                                 /*129*/0,
2606                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2607                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2608                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2609                                        0,
2610                                 /*134*/0,
2611                                        0,
2612                                        MatRARt_MPIAIJ_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*139*/MatSetBlockSizes_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        MatFDColoringSetUp_MPIXAIJ,
2619                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2620                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2621 };
2622 
2623 /* ----------------------------------------------------------------------------------------*/
2624 
2625 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2626 {
2627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2628   PetscErrorCode ierr;
2629 
2630   PetscFunctionBegin;
2631   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2632   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2637 {
2638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2639   PetscErrorCode ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2643   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2648 {
2649   Mat_MPIAIJ     *b;
2650   PetscErrorCode ierr;
2651 
2652   PetscFunctionBegin;
2653   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2654   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2655   b = (Mat_MPIAIJ*)B->data;
2656 
2657 #if defined(PETSC_USE_CTABLE)
2658   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2659 #else
2660   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2661 #endif
2662   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2663   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2664   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2665 
2666   /* Because the B will have been resized we simply destroy it and create a new one each time */
2667   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2668   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2669   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2670   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2671   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2672   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2673 
2674   if (!B->preallocated) {
2675     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2676     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2677     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2678     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2679     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2680   }
2681 
2682   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2683   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2684   B->preallocated  = PETSC_TRUE;
2685   B->was_assembled = PETSC_FALSE;
2686   B->assembled     = PETSC_FALSE;;
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2691 {
2692   Mat_MPIAIJ     *b;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2697   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2698   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2699   b = (Mat_MPIAIJ*)B->data;
2700 
2701 #if defined(PETSC_USE_CTABLE)
2702   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2703 #else
2704   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2705 #endif
2706   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2707   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2708   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2709 
2710   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2711   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2712   B->preallocated  = PETSC_TRUE;
2713   B->was_assembled = PETSC_FALSE;
2714   B->assembled = PETSC_FALSE;
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2719 {
2720   Mat            mat;
2721   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2722   PetscErrorCode ierr;
2723 
2724   PetscFunctionBegin;
2725   *newmat = 0;
2726   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2727   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2728   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2729   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2730   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2731   a       = (Mat_MPIAIJ*)mat->data;
2732 
2733   mat->factortype   = matin->factortype;
2734   mat->assembled    = PETSC_TRUE;
2735   mat->insertmode   = NOT_SET_VALUES;
2736   mat->preallocated = PETSC_TRUE;
2737 
2738   a->size         = oldmat->size;
2739   a->rank         = oldmat->rank;
2740   a->donotstash   = oldmat->donotstash;
2741   a->roworiented  = oldmat->roworiented;
2742   a->rowindices   = 0;
2743   a->rowvalues    = 0;
2744   a->getrowactive = PETSC_FALSE;
2745 
2746   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2747   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2748 
2749   if (oldmat->colmap) {
2750 #if defined(PETSC_USE_CTABLE)
2751     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2752 #else
2753     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2754     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2755     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2756 #endif
2757   } else a->colmap = 0;
2758   if (oldmat->garray) {
2759     PetscInt len;
2760     len  = oldmat->B->cmap->n;
2761     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2762     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2763     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2764   } else a->garray = 0;
2765 
2766   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2767   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2768   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2769   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2770   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2771   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2772   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2773   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2774   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2775   *newmat = mat;
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2780 {
2781   PetscScalar    *vals,*svals;
2782   MPI_Comm       comm;
2783   PetscErrorCode ierr;
2784   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2785   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2786   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2787   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2788   PetscInt       cend,cstart,n,*rowners;
2789   int            fd;
2790   PetscInt       bs = newMat->rmap->bs;
2791 
2792   PetscFunctionBegin;
2793   /* force binary viewer to load .info file if it has not yet done so */
2794   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2795   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2796   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2798   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2801     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2802     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2803   }
2804 
2805   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2806   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808   if (bs < 0) bs = 1;
2809 
2810   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   M    = header[1]; N = header[2];
2812 
2813   /* If global sizes are set, check if they are consistent with that given in the file */
2814   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2815   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2816 
2817   /* determine ownership of all (block) rows */
2818   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2819   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2820   else m = newMat->rmap->n; /* Set by user */
2821 
2822   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2823   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2824 
2825   /* First process needs enough room for process with most rows */
2826   if (!rank) {
2827     mmax = rowners[1];
2828     for (i=2; i<=size; i++) {
2829       mmax = PetscMax(mmax, rowners[i]);
2830     }
2831   } else mmax = -1;             /* unused, but compilers complain */
2832 
2833   rowners[0] = 0;
2834   for (i=2; i<=size; i++) {
2835     rowners[i] += rowners[i-1];
2836   }
2837   rstart = rowners[rank];
2838   rend   = rowners[rank+1];
2839 
2840   /* distribute row lengths to all processors */
2841   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2842   if (!rank) {
2843     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2844     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2845     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2846     for (j=0; j<m; j++) {
2847       procsnz[0] += ourlens[j];
2848     }
2849     for (i=1; i<size; i++) {
2850       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2851       /* calculate the number of nonzeros on each processor */
2852       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2853         procsnz[i] += rowlengths[j];
2854       }
2855       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2858   } else {
2859     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2860   }
2861 
2862   if (!rank) {
2863     /* determine max buffer needed and allocate it */
2864     maxnz = 0;
2865     for (i=0; i<size; i++) {
2866       maxnz = PetscMax(maxnz,procsnz[i]);
2867     }
2868     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2869 
2870     /* read in my part of the matrix column indices  */
2871     nz   = procsnz[0];
2872     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2873     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2874 
2875     /* read in every one elses and ship off */
2876     for (i=1; i<size; i++) {
2877       nz   = procsnz[i];
2878       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2879       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2880     }
2881     ierr = PetscFree(cols);CHKERRQ(ierr);
2882   } else {
2883     /* determine buffer space needed for message */
2884     nz = 0;
2885     for (i=0; i<m; i++) {
2886       nz += ourlens[i];
2887     }
2888     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2889 
2890     /* receive message of column indices*/
2891     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   /* determine column ownership if matrix is not square */
2895   if (N != M) {
2896     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2897     else n = newMat->cmap->n;
2898     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2899     cstart = cend - n;
2900   } else {
2901     cstart = rstart;
2902     cend   = rend;
2903     n      = cend - cstart;
2904   }
2905 
2906   /* loop over local rows, determining number of off diagonal entries */
2907   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2908   jj   = 0;
2909   for (i=0; i<m; i++) {
2910     for (j=0; j<ourlens[i]; j++) {
2911       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2912       jj++;
2913     }
2914   }
2915 
2916   for (i=0; i<m; i++) {
2917     ourlens[i] -= offlens[i];
2918   }
2919   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2920 
2921   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2922 
2923   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2924 
2925   for (i=0; i<m; i++) {
2926     ourlens[i] += offlens[i];
2927   }
2928 
2929   if (!rank) {
2930     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2931 
2932     /* read in my part of the matrix numerical values  */
2933     nz   = procsnz[0];
2934     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935 
2936     /* insert into matrix */
2937     jj      = rstart;
2938     smycols = mycols;
2939     svals   = vals;
2940     for (i=0; i<m; i++) {
2941       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2942       smycols += ourlens[i];
2943       svals   += ourlens[i];
2944       jj++;
2945     }
2946 
2947     /* read in other processors and ship out */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2954   } else {
2955     /* receive numeric values */
2956     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* receive message of values*/
2959     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971   }
2972   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2973   ierr = PetscFree(vals);CHKERRQ(ierr);
2974   ierr = PetscFree(mycols);CHKERRQ(ierr);
2975   ierr = PetscFree(rowners);CHKERRQ(ierr);
2976   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 /* Not scalable because of ISAllGather() unless getting all columns. */
2982 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2983 {
2984   PetscErrorCode ierr;
2985   IS             iscol_local;
2986   PetscBool      isstride;
2987   PetscMPIInt    lisstride=0,gisstride;
2988 
2989   PetscFunctionBegin;
2990   /* check if we are grabbing all columns*/
2991   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2992 
2993   if (isstride) {
2994     PetscInt  start,len,mstart,mlen;
2995     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2996     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2997     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2998     if (mstart == start && mlen-mstart == len) lisstride = 1;
2999   }
3000 
3001   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3002   if (gisstride) {
3003     PetscInt N;
3004     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3005     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3006     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3007     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3008   } else {
3009     PetscInt cbs;
3010     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3011     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3012     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3013   }
3014 
3015   *isseq = iscol_local;
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 /*
3020  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3021  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3022 
3023  Input Parameters:
3024    mat - matrix
3025    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3026            i.e., mat->rstart <= isrow[i] < mat->rend
3027    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3028            i.e., mat->cstart <= iscol[i] < mat->cend
3029  Output Parameter:
3030    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3031    iscol_o - sequential column index set for retrieving mat->B
3032    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3033  */
3034 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3035 {
3036   PetscErrorCode ierr;
3037   Vec            x,cmap;
3038   const PetscInt *is_idx;
3039   PetscScalar    *xarray,*cmaparray;
3040   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3041   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3042   Mat            B=a->B;
3043   Vec            lvec=a->lvec,lcmap;
3044   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3045   MPI_Comm       comm;
3046   PetscMPIInt    rank;
3047   VecScatter     Mvctx;
3048 
3049   PetscFunctionBegin;
3050   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3052   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3053 
3054   //ierr = MatView(mat,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3055   //ierr = ISView(iscol,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3056 
3057   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3058   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3059   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3060   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3061   ierr = VecSet(lvec,-1.0);CHKERRQ(ierr);
3062 
3063   /* Get start indices */
3064   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3065   isstart -= ncols;
3066   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3067 
3068   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3069   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3070   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3071   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3072   for (i=0; i<ncols; i++) {
3073     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3074     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3075     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3076   }
3077   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3078   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3079   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3080   //ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
3081 
3082   /* Get iscol_d */
3083   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3084   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3085   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3086 
3087   /* Get isrow_d */
3088   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3089   rstart = mat->rmap->rstart;
3090   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3091   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3092   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3093   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3094 
3095   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3096   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3097   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3098 
3099   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3100 #if 0
3101   if (!a->Mvctx_mpi1) {
3102     /* a->Mvctx causes random 'count' in o-build? See src/mat/examples/tests/runex59_2 */
3103     a->Mvctx_mpi1_flg = PETSC_TRUE;
3104     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
3105   }
3106   Mvctx = a->Mvctx_mpi1;
3107 #endif
3108   Mvctx = a->Mvctx;
3109   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3110   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3111 
3112   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3113   ierr = VecSet(lcmap,-1.0);CHKERRQ(ierr);
3114   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3115   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3116 
3117   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3118   /* off-process column indices */
3119   count = 0;
3120   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3121   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3122 
3123   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3124   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3125   for (i=0; i<Bn; i++) {
3126     if (PetscRealPart(xarray[i]) > -1.0) {
3127       idx[count]     = i;                   /* local column index in off-diagonal part B */
3128       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3129       count++;
3130     }
3131   }
3132   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3133   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3134   printf("[%d] count %d, nlvec %d\n",rank,count,lvec->map->N);
3135   if (count != 6) {
3136     //if (rank == 1) {
3137     printf("[%d] lvec:\n",rank);
3138     ierr = VecView(lvec,0);CHKERRQ(ierr);
3139     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"count %d != 6",count);
3140   }
3141   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3142   /* cannot ensure iscol_o has same blocksize as iscol! */
3143 
3144   ierr = PetscFree(idx);CHKERRQ(ierr);
3145 
3146   *garray = cmap1;
3147 
3148   ierr = VecDestroy(&x);CHKERRQ(ierr);
3149   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3150   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3155 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3156 {
3157   PetscErrorCode ierr;
3158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3159   Mat            M = NULL;
3160   MPI_Comm       comm;
3161   IS             iscol_d,isrow_d,iscol_o;
3162   Mat            Asub = NULL,Bsub = NULL;
3163   PetscInt       n;
3164 
3165   PetscFunctionBegin;
3166   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3167 
3168   if (call == MAT_REUSE_MATRIX) {
3169     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3170     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3171     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3172 
3173     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3174     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3175 
3176     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3177     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3178 
3179     /* Update diagonal and off-diagonal portions of submat */
3180     asub = (Mat_MPIAIJ*)(*submat)->data;
3181     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3182     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3183     if (n) {
3184       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3185     }
3186     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3187     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3188 
3189   } else { /* call == MAT_INITIAL_MATRIX) */
3190     const PetscInt *garray;
3191     PetscInt        BsubN;
3192 
3193     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3194     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3195 
3196     /* Create local submatrices Asub and Bsub */
3197     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3198     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3199 
3200     /* Create submatrix M */
3201     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3202 
3203     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3204     asub = (Mat_MPIAIJ*)M->data;
3205 
3206     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3207     n = asub->B->cmap->N;
3208     if (BsubN > n) {
3209       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3210       const PetscInt *idx;
3211       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3212       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3213 
3214       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3215       j = 0;
3216       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3217       for (i=0; i<n; i++) {
3218         if (j >= BsubN) break;
3219         while (subgarray[i] > garray[j]) j++;
3220 
3221         if (subgarray[i] == garray[j]) {
3222           idx_new[i] = idx[j++];
3223         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3224       }
3225       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3226 
3227       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3228       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3229 
3230     } else if (BsubN < n) {
3231       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3232     }
3233 
3234     ierr = PetscFree(garray);CHKERRQ(ierr);
3235     *submat = M;
3236 
3237     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3238     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3239     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3240 
3241     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3242     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3243 
3244     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3245     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3246   }
3247   PetscFunctionReturn(0);
3248 }
3249 
3250 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3251 {
3252   PetscErrorCode ierr;
3253   IS             iscol_local=NULL,isrow_d;
3254   PetscInt       csize;
3255   PetscInt       n,i,j,start,end;
3256   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3257   MPI_Comm       comm;
3258 
3259   PetscFunctionBegin;
3260   /* If isrow has same processor distribution as mat,
3261      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3262   if (call == MAT_REUSE_MATRIX) {
3263     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3264     if (isrow_d) {
3265       sameRowDist  = PETSC_TRUE;
3266       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3267     } else {
3268       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3269       if (iscol_local) {
3270         sameRowDist  = PETSC_TRUE;
3271         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3272       }
3273     }
3274   } else {
3275     /* Check if isrow has same processor distribution as mat */
3276     sameDist[0] = PETSC_FALSE;
3277     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3278     if (!n) {
3279       sameDist[0] = PETSC_TRUE;
3280     } else {
3281       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3282       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3283       if (i >= start && j < end) {
3284         sameDist[0] = PETSC_TRUE;
3285       }
3286     }
3287 
3288     /* Check if iscol has same processor distribution as mat */
3289     sameDist[1] = PETSC_FALSE;
3290     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3291     if (!n) {
3292       sameDist[1] = PETSC_TRUE;
3293     } else {
3294       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3295       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3296       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3297     }
3298 
3299     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3300     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3301     sameRowDist = tsameDist[0];
3302   }
3303 
3304   if (sameRowDist) {
3305     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3306       /* isrow and iscol have same processor distribution as mat */
3307       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3308       PetscFunctionReturn(0);
3309     } else { /* sameRowDist */
3310       /* isrow has same processor distribution as mat */
3311       if (call == MAT_INITIAL_MATRIX) {
3312         PetscBool sorted;
3313         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3314         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3315         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3316         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3317 
3318         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3319         if (sorted) {
3320           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3321           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3322           PetscFunctionReturn(0);
3323         }
3324       } else { /* call == MAT_REUSE_MATRIX */
3325         IS    iscol_sub;
3326         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3327         if (iscol_sub) {
3328           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3329           PetscFunctionReturn(0);
3330         }
3331       }
3332     }
3333   }
3334 
3335   /* General case: iscol -> iscol_local which has global size of iscol */
3336   if (call == MAT_REUSE_MATRIX) {
3337     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3338     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3339   } else {
3340     if (!iscol_local) {
3341       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3342     }
3343   }
3344 
3345   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3346   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3347 
3348   if (call == MAT_INITIAL_MATRIX) {
3349     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3350     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3351   }
3352   PetscFunctionReturn(0);
3353 }
3354 
3355 /*@C
3356      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3357          and "off-diagonal" part of the matrix in CSR format.
3358 
3359    Collective on MPI_Comm
3360 
3361    Input Parameters:
3362 +  comm - MPI communicator
3363 .  A - "diagonal" portion of matrix
3364 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3365 -  garray - global index of B columns
3366 
3367    Output Parameter:
3368 .   mat - the matrix, with input A as its local diagonal matrix
3369    Level: advanced
3370 
3371    Notes:
3372        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3373        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3374 
3375 .seealso: MatCreateMPIAIJWithSplitArrays()
3376 @*/
3377 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3378 {
3379   PetscErrorCode ierr;
3380   Mat_MPIAIJ     *maij;
3381   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3382   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3383   PetscScalar    *oa=b->a;
3384   Mat            Bnew;
3385   PetscInt       m,n,N;
3386 
3387   PetscFunctionBegin;
3388   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3389   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3390   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3391   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3392   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3393   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3394 
3395   /* Get global columns of mat */
3396   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3397 
3398   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3399   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3400   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3401   maij = (Mat_MPIAIJ*)(*mat)->data;
3402 
3403   (*mat)->preallocated = PETSC_TRUE;
3404 
3405   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3406   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3407 
3408   /* Set A as diagonal portion of *mat */
3409   maij->A = A;
3410 
3411   nz = oi[m];
3412   for (i=0; i<nz; i++) {
3413     col   = oj[i];
3414     oj[i] = garray[col];
3415   }
3416 
3417    /* Set Bnew as off-diagonal portion of *mat */
3418   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3419   bnew        = (Mat_SeqAIJ*)Bnew->data;
3420   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3421   maij->B     = Bnew;
3422 
3423   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3424 
3425   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3426   b->free_a       = PETSC_FALSE;
3427   b->free_ij      = PETSC_FALSE;
3428   ierr = MatDestroy(&B);CHKERRQ(ierr);
3429 
3430   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3431   bnew->free_a       = PETSC_TRUE;
3432   bnew->free_ij      = PETSC_TRUE;
3433 
3434   /* condense columns of maij->B */
3435   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3436   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3437   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3438   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3439   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3440   PetscFunctionReturn(0);
3441 }
3442 
3443 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3444 
3445 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3446 {
3447   PetscErrorCode ierr;
3448   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3449   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3450   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3451   Mat            M,Msub,B=a->B;
3452   MatScalar      *aa;
3453   Mat_SeqAIJ     *aij;
3454   PetscInt       *garray = a->garray,*colsub,Ncols;
3455   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3456   IS             iscol_sub,iscmap;
3457   const PetscInt *is_idx,*cmap;
3458   PetscBool      allcolumns=PETSC_FALSE;
3459   MPI_Comm       comm;
3460 
3461   PetscFunctionBegin;
3462   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3463 
3464   if (call == MAT_REUSE_MATRIX) {
3465     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3466     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3467     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3468 
3469     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3470     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3471 
3472     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3473     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3474 
3475     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3476 
3477   } else { /* call == MAT_INITIAL_MATRIX) */
3478     PetscBool flg;
3479 
3480     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3481     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3482 
3483     /* (1) iscol -> nonscalable iscol_local */
3484     /* Check for special case: each processor gets entire matrix columns */
3485     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3486     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3487     if (allcolumns) {
3488       iscol_sub = iscol_local;
3489       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3490       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3491 
3492     } else {
3493       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3494       PetscInt *idx,*cmap1,k;
3495       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3496       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3497       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3498       count = 0;
3499       k     = 0;
3500       for (i=0; i<Ncols; i++) {
3501         j = is_idx[i];
3502         if (j >= cstart && j < cend) {
3503           /* diagonal part of mat */
3504           idx[count]     = j;
3505           cmap1[count++] = i; /* column index in submat */
3506         } else if (Bn) {
3507           /* off-diagonal part of mat */
3508           if (j == garray[k]) {
3509             idx[count]     = j;
3510             cmap1[count++] = i;  /* column index in submat */
3511           } else if (j > garray[k]) {
3512             while (j > garray[k] && k < Bn-1) k++;
3513             if (j == garray[k]) {
3514               idx[count]     = j;
3515               cmap1[count++] = i; /* column index in submat */
3516             }
3517           }
3518         }
3519       }
3520       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3521 
3522       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3523       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3524       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3525 
3526       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3527     }
3528 
3529     /* (3) Create sequential Msub */
3530     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3531   }
3532 
3533   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3534   aij  = (Mat_SeqAIJ*)(Msub)->data;
3535   ii   = aij->i;
3536   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3537 
3538   /*
3539       m - number of local rows
3540       Ncols - number of columns (same on all processors)
3541       rstart - first row in new global matrix generated
3542   */
3543   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3544 
3545   if (call == MAT_INITIAL_MATRIX) {
3546     /* (4) Create parallel newmat */
3547     PetscMPIInt    rank,size;
3548     PetscInt       csize;
3549 
3550     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3551     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3552 
3553     /*
3554         Determine the number of non-zeros in the diagonal and off-diagonal
3555         portions of the matrix in order to do correct preallocation
3556     */
3557 
3558     /* first get start and end of "diagonal" columns */
3559     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3560     if (csize == PETSC_DECIDE) {
3561       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3562       if (mglobal == Ncols) { /* square matrix */
3563         nlocal = m;
3564       } else {
3565         nlocal = Ncols/size + ((Ncols % size) > rank);
3566       }
3567     } else {
3568       nlocal = csize;
3569     }
3570     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3571     rstart = rend - nlocal;
3572     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3573 
3574     /* next, compute all the lengths */
3575     jj    = aij->j;
3576     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3577     olens = dlens + m;
3578     for (i=0; i<m; i++) {
3579       jend = ii[i+1] - ii[i];
3580       olen = 0;
3581       dlen = 0;
3582       for (j=0; j<jend; j++) {
3583         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3584         else dlen++;
3585         jj++;
3586       }
3587       olens[i] = olen;
3588       dlens[i] = dlen;
3589     }
3590 
3591     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3592     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3593 
3594     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3595     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3596     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3597     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3598     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3599     ierr = PetscFree(dlens);CHKERRQ(ierr);
3600 
3601   } else { /* call == MAT_REUSE_MATRIX */
3602     M    = *newmat;
3603     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3604     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3605     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3606     /*
3607          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3608        rather than the slower MatSetValues().
3609     */
3610     M->was_assembled = PETSC_TRUE;
3611     M->assembled     = PETSC_FALSE;
3612   }
3613 
3614   /* (5) Set values of Msub to *newmat */
3615   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3616   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3617 
3618   jj   = aij->j;
3619   aa   = aij->a;
3620   for (i=0; i<m; i++) {
3621     row = rstart + i;
3622     nz  = ii[i+1] - ii[i];
3623     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3624     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3625     jj += nz; aa += nz;
3626   }
3627   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3628 
3629   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3630   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631 
3632   ierr = PetscFree(colsub);CHKERRQ(ierr);
3633 
3634   /* save Msub, iscol_sub and iscmap used in processor for next request */
3635   if (call ==  MAT_INITIAL_MATRIX) {
3636     *newmat = M;
3637     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3638     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3639 
3640     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3641     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3642 
3643     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3644     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3645 
3646     if (iscol_local) {
3647       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3648       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3649     }
3650   }
3651   PetscFunctionReturn(0);
3652 }
3653 
3654 /*
3655     Not great since it makes two copies of the submatrix, first an SeqAIJ
3656   in local and then by concatenating the local matrices the end result.
3657   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3658 
3659   Note: This requires a sequential iscol with all indices.
3660 */
3661 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3662 {
3663   PetscErrorCode ierr;
3664   PetscMPIInt    rank,size;
3665   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3666   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3667   Mat            M,Mreuse;
3668   MatScalar      *aa,*vwork;
3669   MPI_Comm       comm;
3670   Mat_SeqAIJ     *aij;
3671   PetscBool      colflag,allcolumns=PETSC_FALSE;
3672 
3673   PetscFunctionBegin;
3674   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3675   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3676   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3677 
3678   /* Check for special case: each processor gets entire matrix columns */
3679   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3680   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3681   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3682 
3683   if (call ==  MAT_REUSE_MATRIX) {
3684     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3685     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3686     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3687   } else {
3688     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3689   }
3690 
3691   /*
3692       m - number of local rows
3693       n - number of columns (same on all processors)
3694       rstart - first row in new global matrix generated
3695   */
3696   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3697   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3698   if (call == MAT_INITIAL_MATRIX) {
3699     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3700     ii  = aij->i;
3701     jj  = aij->j;
3702 
3703     /*
3704         Determine the number of non-zeros in the diagonal and off-diagonal
3705         portions of the matrix in order to do correct preallocation
3706     */
3707 
3708     /* first get start and end of "diagonal" columns */
3709     if (csize == PETSC_DECIDE) {
3710       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3711       if (mglobal == n) { /* square matrix */
3712         nlocal = m;
3713       } else {
3714         nlocal = n/size + ((n % size) > rank);
3715       }
3716     } else {
3717       nlocal = csize;
3718     }
3719     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3720     rstart = rend - nlocal;
3721     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3722 
3723     /* next, compute all the lengths */
3724     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3725     olens = dlens + m;
3726     for (i=0; i<m; i++) {
3727       jend = ii[i+1] - ii[i];
3728       olen = 0;
3729       dlen = 0;
3730       for (j=0; j<jend; j++) {
3731         if (*jj < rstart || *jj >= rend) olen++;
3732         else dlen++;
3733         jj++;
3734       }
3735       olens[i] = olen;
3736       dlens[i] = dlen;
3737     }
3738     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3739     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3740     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3741     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3742     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3743     ierr = PetscFree(dlens);CHKERRQ(ierr);
3744   } else {
3745     PetscInt ml,nl;
3746 
3747     M    = *newmat;
3748     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3749     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3750     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3751     /*
3752          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3753        rather than the slower MatSetValues().
3754     */
3755     M->was_assembled = PETSC_TRUE;
3756     M->assembled     = PETSC_FALSE;
3757   }
3758   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3759   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3760   ii   = aij->i;
3761   jj   = aij->j;
3762   aa   = aij->a;
3763   for (i=0; i<m; i++) {
3764     row   = rstart + i;
3765     nz    = ii[i+1] - ii[i];
3766     cwork = jj;     jj += nz;
3767     vwork = aa;     aa += nz;
3768     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3769   }
3770 
3771   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3773   *newmat = M;
3774 
3775   /* save submatrix used in processor for next request */
3776   if (call ==  MAT_INITIAL_MATRIX) {
3777     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3778     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3779   }
3780   PetscFunctionReturn(0);
3781 }
3782 
3783 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3784 {
3785   PetscInt       m,cstart, cend,j,nnz,i,d;
3786   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3787   const PetscInt *JJ;
3788   PetscScalar    *values;
3789   PetscErrorCode ierr;
3790   PetscBool      nooffprocentries;
3791 
3792   PetscFunctionBegin;
3793   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3794 
3795   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3796   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3797   m      = B->rmap->n;
3798   cstart = B->cmap->rstart;
3799   cend   = B->cmap->rend;
3800   rstart = B->rmap->rstart;
3801 
3802   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3803 
3804 #if defined(PETSC_USE_DEBUGGING)
3805   for (i=0; i<m; i++) {
3806     nnz = Ii[i+1]- Ii[i];
3807     JJ  = J + Ii[i];
3808     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3809     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3810     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3811   }
3812 #endif
3813 
3814   for (i=0; i<m; i++) {
3815     nnz     = Ii[i+1]- Ii[i];
3816     JJ      = J + Ii[i];
3817     nnz_max = PetscMax(nnz_max,nnz);
3818     d       = 0;
3819     for (j=0; j<nnz; j++) {
3820       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3821     }
3822     d_nnz[i] = d;
3823     o_nnz[i] = nnz - d;
3824   }
3825   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3826   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3827 
3828   if (v) values = (PetscScalar*)v;
3829   else {
3830     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3831   }
3832 
3833   for (i=0; i<m; i++) {
3834     ii   = i + rstart;
3835     nnz  = Ii[i+1]- Ii[i];
3836     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3837   }
3838   nooffprocentries    = B->nooffprocentries;
3839   B->nooffprocentries = PETSC_TRUE;
3840   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3841   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3842   B->nooffprocentries = nooffprocentries;
3843 
3844   if (!v) {
3845     ierr = PetscFree(values);CHKERRQ(ierr);
3846   }
3847   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3848   PetscFunctionReturn(0);
3849 }
3850 
3851 /*@
3852    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3853    (the default parallel PETSc format).
3854 
3855    Collective on MPI_Comm
3856 
3857    Input Parameters:
3858 +  B - the matrix
3859 .  i - the indices into j for the start of each local row (starts with zero)
3860 .  j - the column indices for each local row (starts with zero)
3861 -  v - optional values in the matrix
3862 
3863    Level: developer
3864 
3865    Notes:
3866        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3867      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3868      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3869 
3870        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3871 
3872        The format which is used for the sparse matrix input, is equivalent to a
3873     row-major ordering.. i.e for the following matrix, the input data expected is
3874     as shown
3875 
3876 $        1 0 0
3877 $        2 0 3     P0
3878 $       -------
3879 $        4 5 6     P1
3880 $
3881 $     Process0 [P0]: rows_owned=[0,1]
3882 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3883 $        j =  {0,0,2}  [size = 3]
3884 $        v =  {1,2,3}  [size = 3]
3885 $
3886 $     Process1 [P1]: rows_owned=[2]
3887 $        i =  {0,3}    [size = nrow+1  = 1+1]
3888 $        j =  {0,1,2}  [size = 3]
3889 $        v =  {4,5,6}  [size = 3]
3890 
3891 .keywords: matrix, aij, compressed row, sparse, parallel
3892 
3893 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3894           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3895 @*/
3896 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3897 {
3898   PetscErrorCode ierr;
3899 
3900   PetscFunctionBegin;
3901   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 /*@C
3906    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3907    (the default parallel PETSc format).  For good matrix assembly performance
3908    the user should preallocate the matrix storage by setting the parameters
3909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910    performance can be increased by more than a factor of 50.
3911 
3912    Collective on MPI_Comm
3913 
3914    Input Parameters:
3915 +  B - the matrix
3916 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917            (same value is used for all local rows)
3918 .  d_nnz - array containing the number of nonzeros in the various rows of the
3919            DIAGONAL portion of the local submatrix (possibly different for each row)
3920            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3921            The size of this array is equal to the number of local rows, i.e 'm'.
3922            For matrices that will be factored, you must leave room for (and set)
3923            the diagonal entry even if it is zero.
3924 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925            submatrix (same value is used for all local rows).
3926 -  o_nnz - array containing the number of nonzeros in the various rows of the
3927            OFF-DIAGONAL portion of the local submatrix (possibly different for
3928            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3929            structure. The size of this array is equal to the number
3930            of local rows, i.e 'm'.
3931 
3932    If the *_nnz parameter is given then the *_nz parameter is ignored
3933 
3934    The AIJ format (also called the Yale sparse matrix format or
3935    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3936    storage.  The stored row and column indices begin with zero.
3937    See Users-Manual: ch_mat for details.
3938 
3939    The parallel matrix is partitioned such that the first m0 rows belong to
3940    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3941    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3942 
3943    The DIAGONAL portion of the local submatrix of a processor can be defined
3944    as the submatrix which is obtained by extraction the part corresponding to
3945    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3946    first row that belongs to the processor, r2 is the last row belonging to
3947    the this processor, and c1-c2 is range of indices of the local part of a
3948    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3949    common case of a square matrix, the row and column ranges are the same and
3950    the DIAGONAL part is also square. The remaining portion of the local
3951    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3952 
3953    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3954 
3955    You can call MatGetInfo() to get information on how effective the preallocation was;
3956    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3957    You can also run with the option -info and look for messages with the string
3958    malloc in them to see if additional memory allocation was needed.
3959 
3960    Example usage:
3961 
3962    Consider the following 8x8 matrix with 34 non-zero values, that is
3963    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3964    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3965    as follows:
3966 
3967 .vb
3968             1  2  0  |  0  3  0  |  0  4
3969     Proc0   0  5  6  |  7  0  0  |  8  0
3970             9  0 10  | 11  0  0  | 12  0
3971     -------------------------------------
3972            13  0 14  | 15 16 17  |  0  0
3973     Proc1   0 18  0  | 19 20 21  |  0  0
3974             0  0  0  | 22 23  0  | 24  0
3975     -------------------------------------
3976     Proc2  25 26 27  |  0  0 28  | 29  0
3977            30  0  0  | 31 32 33  |  0 34
3978 .ve
3979 
3980    This can be represented as a collection of submatrices as:
3981 
3982 .vb
3983       A B C
3984       D E F
3985       G H I
3986 .ve
3987 
3988    Where the submatrices A,B,C are owned by proc0, D,E,F are
3989    owned by proc1, G,H,I are owned by proc2.
3990 
3991    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'M','N' parameters are 8,8, and have the same values on all procs.
3994 
3995    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3996    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3997    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3998    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3999    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4000    matrix, ans [DF] as another SeqAIJ matrix.
4001 
4002    When d_nz, o_nz parameters are specified, d_nz storage elements are
4003    allocated for every row of the local diagonal submatrix, and o_nz
4004    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4005    One way to choose d_nz and o_nz is to use the max nonzerors per local
4006    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4007    In this case, the values of d_nz,o_nz are:
4008 .vb
4009      proc0 : dnz = 2, o_nz = 2
4010      proc1 : dnz = 3, o_nz = 2
4011      proc2 : dnz = 1, o_nz = 4
4012 .ve
4013    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4014    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4015    for proc3. i.e we are using 12+15+10=37 storage locations to store
4016    34 values.
4017 
4018    When d_nnz, o_nnz parameters are specified, the storage is specified
4019    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4020    In the above case the values for d_nnz,o_nnz are:
4021 .vb
4022      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4023      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4024      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4025 .ve
4026    Here the space allocated is sum of all the above values i.e 34, and
4027    hence pre-allocation is perfect.
4028 
4029    Level: intermediate
4030 
4031 .keywords: matrix, aij, compressed row, sparse, parallel
4032 
4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4034           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4035 @*/
4036 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4037 {
4038   PetscErrorCode ierr;
4039 
4040   PetscFunctionBegin;
4041   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4042   PetscValidType(B,1);
4043   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 /*@
4048      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4049          CSR format the local rows.
4050 
4051    Collective on MPI_Comm
4052 
4053    Input Parameters:
4054 +  comm - MPI communicator
4055 .  m - number of local rows (Cannot be PETSC_DECIDE)
4056 .  n - This value should be the same as the local size used in creating the
4057        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4058        calculated if N is given) For square matrices n is almost always m.
4059 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4060 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4061 .   i - row indices
4062 .   j - column indices
4063 -   a - matrix values
4064 
4065    Output Parameter:
4066 .   mat - the matrix
4067 
4068    Level: intermediate
4069 
4070    Notes:
4071        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4072      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4073      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4074 
4075        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4076 
4077        The format which is used for the sparse matrix input, is equivalent to a
4078     row-major ordering.. i.e for the following matrix, the input data expected is
4079     as shown
4080 
4081 $        1 0 0
4082 $        2 0 3     P0
4083 $       -------
4084 $        4 5 6     P1
4085 $
4086 $     Process0 [P0]: rows_owned=[0,1]
4087 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4088 $        j =  {0,0,2}  [size = 3]
4089 $        v =  {1,2,3}  [size = 3]
4090 $
4091 $     Process1 [P1]: rows_owned=[2]
4092 $        i =  {0,3}    [size = nrow+1  = 1+1]
4093 $        j =  {0,1,2}  [size = 3]
4094 $        v =  {4,5,6}  [size = 3]
4095 
4096 .keywords: matrix, aij, compressed row, sparse, parallel
4097 
4098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4099           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4100 @*/
4101 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4102 {
4103   PetscErrorCode ierr;
4104 
4105   PetscFunctionBegin;
4106   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4107   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4108   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4109   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4110   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4111   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4112   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4113   PetscFunctionReturn(0);
4114 }
4115 
4116 /*@C
4117    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4118    (the default parallel PETSc format).  For good matrix assembly performance
4119    the user should preallocate the matrix storage by setting the parameters
4120    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4121    performance can be increased by more than a factor of 50.
4122 
4123    Collective on MPI_Comm
4124 
4125    Input Parameters:
4126 +  comm - MPI communicator
4127 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4128            This value should be the same as the local size used in creating the
4129            y vector for the matrix-vector product y = Ax.
4130 .  n - This value should be the same as the local size used in creating the
4131        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4132        calculated if N is given) For square matrices n is almost always m.
4133 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4134 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4135 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4136            (same value is used for all local rows)
4137 .  d_nnz - array containing the number of nonzeros in the various rows of the
4138            DIAGONAL portion of the local submatrix (possibly different for each row)
4139            or NULL, if d_nz is used to specify the nonzero structure.
4140            The size of this array is equal to the number of local rows, i.e 'm'.
4141 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4142            submatrix (same value is used for all local rows).
4143 -  o_nnz - array containing the number of nonzeros in the various rows of the
4144            OFF-DIAGONAL portion of the local submatrix (possibly different for
4145            each row) or NULL, if o_nz is used to specify the nonzero
4146            structure. The size of this array is equal to the number
4147            of local rows, i.e 'm'.
4148 
4149    Output Parameter:
4150 .  A - the matrix
4151 
4152    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4153    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4154    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4155 
4156    Notes:
4157    If the *_nnz parameter is given then the *_nz parameter is ignored
4158 
4159    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4160    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4161    storage requirements for this matrix.
4162 
4163    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4164    processor than it must be used on all processors that share the object for
4165    that argument.
4166 
4167    The user MUST specify either the local or global matrix dimensions
4168    (possibly both).
4169 
4170    The parallel matrix is partitioned across processors such that the
4171    first m0 rows belong to process 0, the next m1 rows belong to
4172    process 1, the next m2 rows belong to process 2 etc.. where
4173    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4174    values corresponding to [m x N] submatrix.
4175 
4176    The columns are logically partitioned with the n0 columns belonging
4177    to 0th partition, the next n1 columns belonging to the next
4178    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4179 
4180    The DIAGONAL portion of the local submatrix on any given processor
4181    is the submatrix corresponding to the rows and columns m,n
4182    corresponding to the given processor. i.e diagonal matrix on
4183    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4184    etc. The remaining portion of the local submatrix [m x (N-n)]
4185    constitute the OFF-DIAGONAL portion. The example below better
4186    illustrates this concept.
4187 
4188    For a square global matrix we define each processor's diagonal portion
4189    to be its local rows and the corresponding columns (a square submatrix);
4190    each processor's off-diagonal portion encompasses the remainder of the
4191    local matrix (a rectangular submatrix).
4192 
4193    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4194 
4195    When calling this routine with a single process communicator, a matrix of
4196    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4197    type of communicator, use the construction mechanism
4198 .vb
4199      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4200 .ve
4201 
4202 $     MatCreate(...,&A);
4203 $     MatSetType(A,MATMPIAIJ);
4204 $     MatSetSizes(A, m,n,M,N);
4205 $     MatMPIAIJSetPreallocation(A,...);
4206 
4207    By default, this format uses inodes (identical nodes) when possible.
4208    We search for consecutive rows with the same nonzero structure, thereby
4209    reusing matrix information to achieve increased efficiency.
4210 
4211    Options Database Keys:
4212 +  -mat_no_inode  - Do not use inodes
4213 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4214 -  -mat_aij_oneindex - Internally use indexing starting at 1
4215         rather than 0.  Note that when calling MatSetValues(),
4216         the user still MUST index entries starting at 0!
4217 
4218 
4219    Example usage:
4220 
4221    Consider the following 8x8 matrix with 34 non-zero values, that is
4222    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4223    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4224    as follows
4225 
4226 .vb
4227             1  2  0  |  0  3  0  |  0  4
4228     Proc0   0  5  6  |  7  0  0  |  8  0
4229             9  0 10  | 11  0  0  | 12  0
4230     -------------------------------------
4231            13  0 14  | 15 16 17  |  0  0
4232     Proc1   0 18  0  | 19 20 21  |  0  0
4233             0  0  0  | 22 23  0  | 24  0
4234     -------------------------------------
4235     Proc2  25 26 27  |  0  0 28  | 29  0
4236            30  0  0  | 31 32 33  |  0 34
4237 .ve
4238 
4239    This can be represented as a collection of submatrices as
4240 
4241 .vb
4242       A B C
4243       D E F
4244       G H I
4245 .ve
4246 
4247    Where the submatrices A,B,C are owned by proc0, D,E,F are
4248    owned by proc1, G,H,I are owned by proc2.
4249 
4250    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4251    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4252    The 'M','N' parameters are 8,8, and have the same values on all procs.
4253 
4254    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4255    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4256    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4257    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4258    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4259    matrix, ans [DF] as another SeqAIJ matrix.
4260 
4261    When d_nz, o_nz parameters are specified, d_nz storage elements are
4262    allocated for every row of the local diagonal submatrix, and o_nz
4263    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4264    One way to choose d_nz and o_nz is to use the max nonzerors per local
4265    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4266    In this case, the values of d_nz,o_nz are
4267 .vb
4268      proc0 : dnz = 2, o_nz = 2
4269      proc1 : dnz = 3, o_nz = 2
4270      proc2 : dnz = 1, o_nz = 4
4271 .ve
4272    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4273    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4274    for proc3. i.e we are using 12+15+10=37 storage locations to store
4275    34 values.
4276 
4277    When d_nnz, o_nnz parameters are specified, the storage is specified
4278    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4279    In the above case the values for d_nnz,o_nnz are
4280 .vb
4281      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4282      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4283      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4284 .ve
4285    Here the space allocated is sum of all the above values i.e 34, and
4286    hence pre-allocation is perfect.
4287 
4288    Level: intermediate
4289 
4290 .keywords: matrix, aij, compressed row, sparse, parallel
4291 
4292 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4293           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4294 @*/
4295 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4296 {
4297   PetscErrorCode ierr;
4298   PetscMPIInt    size;
4299 
4300   PetscFunctionBegin;
4301   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4302   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4303   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4304   if (size > 1) {
4305     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4306     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4307   } else {
4308     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4309     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4310   }
4311   PetscFunctionReturn(0);
4312 }
4313 
4314 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4315 {
4316   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4317   PetscBool      flg;
4318   PetscErrorCode ierr;
4319 
4320   PetscFunctionBegin;
4321   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4322   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4323   if (Ad)     *Ad     = a->A;
4324   if (Ao)     *Ao     = a->B;
4325   if (colmap) *colmap = a->garray;
4326   PetscFunctionReturn(0);
4327 }
4328 
4329 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4330 {
4331   PetscErrorCode ierr;
4332   PetscInt       m,N,i,rstart,nnz,Ii;
4333   PetscInt       *indx;
4334   PetscScalar    *values;
4335 
4336   PetscFunctionBegin;
4337   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4338   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4339     PetscInt       *dnz,*onz,sum,bs,cbs;
4340 
4341     if (n == PETSC_DECIDE) {
4342       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4343     }
4344     /* Check sum(n) = N */
4345     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4346     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4347 
4348     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4349     rstart -= m;
4350 
4351     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4352     for (i=0; i<m; i++) {
4353       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4354       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4355       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4356     }
4357 
4358     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4359     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4360     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4361     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4362     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4363     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4364     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4365     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4366   }
4367 
4368   /* numeric phase */
4369   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4370   for (i=0; i<m; i++) {
4371     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4372     Ii   = i + rstart;
4373     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4374     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4375   }
4376   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4377   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   PetscFunctionReturn(0);
4379 }
4380 
4381 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4382 {
4383   PetscErrorCode    ierr;
4384   PetscMPIInt       rank;
4385   PetscInt          m,N,i,rstart,nnz;
4386   size_t            len;
4387   const PetscInt    *indx;
4388   PetscViewer       out;
4389   char              *name;
4390   Mat               B;
4391   const PetscScalar *values;
4392 
4393   PetscFunctionBegin;
4394   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4395   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4396   /* Should this be the type of the diagonal block of A? */
4397   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4398   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4399   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4400   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4401   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4402   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4403   for (i=0; i<m; i++) {
4404     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4405     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4406     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4407   }
4408   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4409   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4410 
4411   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4412   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4413   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4414   sprintf(name,"%s.%d",outfile,rank);
4415   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4416   ierr = PetscFree(name);CHKERRQ(ierr);
4417   ierr = MatView(B,out);CHKERRQ(ierr);
4418   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4419   ierr = MatDestroy(&B);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4424 {
4425   PetscErrorCode      ierr;
4426   Mat_Merge_SeqsToMPI *merge;
4427   PetscContainer      container;
4428 
4429   PetscFunctionBegin;
4430   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4431   if (container) {
4432     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4434     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4435     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4445     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4446     ierr = PetscFree(merge);CHKERRQ(ierr);
4447     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4448   }
4449   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #include <../src/mat/utils/freespace.h>
4454 #include <petscbt.h>
4455 
4456 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4457 {
4458   PetscErrorCode      ierr;
4459   MPI_Comm            comm;
4460   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4461   PetscMPIInt         size,rank,taga,*len_s;
4462   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4463   PetscInt            proc,m;
4464   PetscInt            **buf_ri,**buf_rj;
4465   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4466   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4467   MPI_Request         *s_waits,*r_waits;
4468   MPI_Status          *status;
4469   MatScalar           *aa=a->a;
4470   MatScalar           **abuf_r,*ba_i;
4471   Mat_Merge_SeqsToMPI *merge;
4472   PetscContainer      container;
4473 
4474   PetscFunctionBegin;
4475   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4476   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4477 
4478   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4479   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4480 
4481   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4482   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4483 
4484   bi     = merge->bi;
4485   bj     = merge->bj;
4486   buf_ri = merge->buf_ri;
4487   buf_rj = merge->buf_rj;
4488 
4489   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4490   owners = merge->rowmap->range;
4491   len_s  = merge->len_s;
4492 
4493   /* send and recv matrix values */
4494   /*-----------------------------*/
4495   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4496   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4497 
4498   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4499   for (proc=0,k=0; proc<size; proc++) {
4500     if (!len_s[proc]) continue;
4501     i    = owners[proc];
4502     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4503     k++;
4504   }
4505 
4506   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4507   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4508   ierr = PetscFree(status);CHKERRQ(ierr);
4509 
4510   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4511   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4512 
4513   /* insert mat values of mpimat */
4514   /*----------------------------*/
4515   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4516   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4517 
4518   for (k=0; k<merge->nrecv; k++) {
4519     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4520     nrows       = *(buf_ri_k[k]);
4521     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4522     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4523   }
4524 
4525   /* set values of ba */
4526   m = merge->rowmap->n;
4527   for (i=0; i<m; i++) {
4528     arow = owners[rank] + i;
4529     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4530     bnzi = bi[i+1] - bi[i];
4531     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4532 
4533     /* add local non-zero vals of this proc's seqmat into ba */
4534     anzi   = ai[arow+1] - ai[arow];
4535     aj     = a->j + ai[arow];
4536     aa     = a->a + ai[arow];
4537     nextaj = 0;
4538     for (j=0; nextaj<anzi; j++) {
4539       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4540         ba_i[j] += aa[nextaj++];
4541       }
4542     }
4543 
4544     /* add received vals into ba */
4545     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4546       /* i-th row */
4547       if (i == *nextrow[k]) {
4548         anzi   = *(nextai[k]+1) - *nextai[k];
4549         aj     = buf_rj[k] + *(nextai[k]);
4550         aa     = abuf_r[k] + *(nextai[k]);
4551         nextaj = 0;
4552         for (j=0; nextaj<anzi; j++) {
4553           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4554             ba_i[j] += aa[nextaj++];
4555           }
4556         }
4557         nextrow[k]++; nextai[k]++;
4558       }
4559     }
4560     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4561   }
4562   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4563   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4564 
4565   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4566   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4567   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4568   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4569   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4570   PetscFunctionReturn(0);
4571 }
4572 
4573 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4574 {
4575   PetscErrorCode      ierr;
4576   Mat                 B_mpi;
4577   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4578   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4579   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4580   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4581   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4582   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4583   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4584   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4585   MPI_Status          *status;
4586   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4587   PetscBT             lnkbt;
4588   Mat_Merge_SeqsToMPI *merge;
4589   PetscContainer      container;
4590 
4591   PetscFunctionBegin;
4592   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4593 
4594   /* make sure it is a PETSc comm */
4595   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4596   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4597   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4598 
4599   ierr = PetscNew(&merge);CHKERRQ(ierr);
4600   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4601 
4602   /* determine row ownership */
4603   /*---------------------------------------------------------*/
4604   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4605   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4606   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4607   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4608   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4609   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4610   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4611 
4612   m      = merge->rowmap->n;
4613   owners = merge->rowmap->range;
4614 
4615   /* determine the number of messages to send, their lengths */
4616   /*---------------------------------------------------------*/
4617   len_s = merge->len_s;
4618 
4619   len          = 0; /* length of buf_si[] */
4620   merge->nsend = 0;
4621   for (proc=0; proc<size; proc++) {
4622     len_si[proc] = 0;
4623     if (proc == rank) {
4624       len_s[proc] = 0;
4625     } else {
4626       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4627       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4628     }
4629     if (len_s[proc]) {
4630       merge->nsend++;
4631       nrows = 0;
4632       for (i=owners[proc]; i<owners[proc+1]; i++) {
4633         if (ai[i+1] > ai[i]) nrows++;
4634       }
4635       len_si[proc] = 2*(nrows+1);
4636       len         += len_si[proc];
4637     }
4638   }
4639 
4640   /* determine the number and length of messages to receive for ij-structure */
4641   /*-------------------------------------------------------------------------*/
4642   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4643   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4644 
4645   /* post the Irecv of j-structure */
4646   /*-------------------------------*/
4647   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4648   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4649 
4650   /* post the Isend of j-structure */
4651   /*--------------------------------*/
4652   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4653 
4654   for (proc=0, k=0; proc<size; proc++) {
4655     if (!len_s[proc]) continue;
4656     i    = owners[proc];
4657     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4658     k++;
4659   }
4660 
4661   /* receives and sends of j-structure are complete */
4662   /*------------------------------------------------*/
4663   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4664   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4665 
4666   /* send and recv i-structure */
4667   /*---------------------------*/
4668   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4669   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4670 
4671   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4672   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4673   for (proc=0,k=0; proc<size; proc++) {
4674     if (!len_s[proc]) continue;
4675     /* form outgoing message for i-structure:
4676          buf_si[0]:                 nrows to be sent
4677                [1:nrows]:           row index (global)
4678                [nrows+1:2*nrows+1]: i-structure index
4679     */
4680     /*-------------------------------------------*/
4681     nrows       = len_si[proc]/2 - 1;
4682     buf_si_i    = buf_si + nrows+1;
4683     buf_si[0]   = nrows;
4684     buf_si_i[0] = 0;
4685     nrows       = 0;
4686     for (i=owners[proc]; i<owners[proc+1]; i++) {
4687       anzi = ai[i+1] - ai[i];
4688       if (anzi) {
4689         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4690         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4691         nrows++;
4692       }
4693     }
4694     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4695     k++;
4696     buf_si += len_si[proc];
4697   }
4698 
4699   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4700   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4701 
4702   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4703   for (i=0; i<merge->nrecv; i++) {
4704     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4705   }
4706 
4707   ierr = PetscFree(len_si);CHKERRQ(ierr);
4708   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4709   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4710   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4711   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4712   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4713   ierr = PetscFree(status);CHKERRQ(ierr);
4714 
4715   /* compute a local seq matrix in each processor */
4716   /*----------------------------------------------*/
4717   /* allocate bi array and free space for accumulating nonzero column info */
4718   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4719   bi[0] = 0;
4720 
4721   /* create and initialize a linked list */
4722   nlnk = N+1;
4723   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4724 
4725   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4726   len  = ai[owners[rank+1]] - ai[owners[rank]];
4727   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4728 
4729   current_space = free_space;
4730 
4731   /* determine symbolic info for each local row */
4732   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4733 
4734   for (k=0; k<merge->nrecv; k++) {
4735     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4736     nrows       = *buf_ri_k[k];
4737     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4738     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4739   }
4740 
4741   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4742   len  = 0;
4743   for (i=0; i<m; i++) {
4744     bnzi = 0;
4745     /* add local non-zero cols of this proc's seqmat into lnk */
4746     arow  = owners[rank] + i;
4747     anzi  = ai[arow+1] - ai[arow];
4748     aj    = a->j + ai[arow];
4749     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4750     bnzi += nlnk;
4751     /* add received col data into lnk */
4752     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4753       if (i == *nextrow[k]) { /* i-th row */
4754         anzi  = *(nextai[k]+1) - *nextai[k];
4755         aj    = buf_rj[k] + *nextai[k];
4756         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4757         bnzi += nlnk;
4758         nextrow[k]++; nextai[k]++;
4759       }
4760     }
4761     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4762 
4763     /* if free space is not available, make more free space */
4764     if (current_space->local_remaining<bnzi) {
4765       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4766       nspacedouble++;
4767     }
4768     /* copy data into free space, then initialize lnk */
4769     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4770     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4771 
4772     current_space->array           += bnzi;
4773     current_space->local_used      += bnzi;
4774     current_space->local_remaining -= bnzi;
4775 
4776     bi[i+1] = bi[i] + bnzi;
4777   }
4778 
4779   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4780 
4781   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4782   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4783   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4784 
4785   /* create symbolic parallel matrix B_mpi */
4786   /*---------------------------------------*/
4787   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4788   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4789   if (n==PETSC_DECIDE) {
4790     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4791   } else {
4792     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4793   }
4794   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4795   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4796   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4797   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4798   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4799 
4800   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4801   B_mpi->assembled    = PETSC_FALSE;
4802   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4803   merge->bi           = bi;
4804   merge->bj           = bj;
4805   merge->buf_ri       = buf_ri;
4806   merge->buf_rj       = buf_rj;
4807   merge->coi          = NULL;
4808   merge->coj          = NULL;
4809   merge->owners_co    = NULL;
4810 
4811   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4812 
4813   /* attach the supporting struct to B_mpi for reuse */
4814   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4815   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4816   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4817   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4818   *mpimat = B_mpi;
4819 
4820   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4821   PetscFunctionReturn(0);
4822 }
4823 
4824 /*@C
4825       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4826                  matrices from each processor
4827 
4828     Collective on MPI_Comm
4829 
4830    Input Parameters:
4831 +    comm - the communicators the parallel matrix will live on
4832 .    seqmat - the input sequential matrices
4833 .    m - number of local rows (or PETSC_DECIDE)
4834 .    n - number of local columns (or PETSC_DECIDE)
4835 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4836 
4837    Output Parameter:
4838 .    mpimat - the parallel matrix generated
4839 
4840     Level: advanced
4841 
4842    Notes:
4843      The dimensions of the sequential matrix in each processor MUST be the same.
4844      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4845      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4846 @*/
4847 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4848 {
4849   PetscErrorCode ierr;
4850   PetscMPIInt    size;
4851 
4852   PetscFunctionBegin;
4853   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4854   if (size == 1) {
4855     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4856     if (scall == MAT_INITIAL_MATRIX) {
4857       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4858     } else {
4859       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4860     }
4861     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4862     PetscFunctionReturn(0);
4863   }
4864   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4865   if (scall == MAT_INITIAL_MATRIX) {
4866     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4867   }
4868   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4869   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4870   PetscFunctionReturn(0);
4871 }
4872 
4873 /*@
4874      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4875           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4876           with MatGetSize()
4877 
4878     Not Collective
4879 
4880    Input Parameters:
4881 +    A - the matrix
4882 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4883 
4884    Output Parameter:
4885 .    A_loc - the local sequential matrix generated
4886 
4887     Level: developer
4888 
4889 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4890 
4891 @*/
4892 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4893 {
4894   PetscErrorCode ierr;
4895   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4896   Mat_SeqAIJ     *mat,*a,*b;
4897   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4898   MatScalar      *aa,*ba,*cam;
4899   PetscScalar    *ca;
4900   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4901   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4902   PetscBool      match;
4903   MPI_Comm       comm;
4904   PetscMPIInt    size;
4905 
4906   PetscFunctionBegin;
4907   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4908   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4909   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4910   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4911   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4912 
4913   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4914   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4915   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4916   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4917   aa = a->a; ba = b->a;
4918   if (scall == MAT_INITIAL_MATRIX) {
4919     if (size == 1) {
4920       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4921       PetscFunctionReturn(0);
4922     }
4923 
4924     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4925     ci[0] = 0;
4926     for (i=0; i<am; i++) {
4927       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4928     }
4929     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4930     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4931     k    = 0;
4932     for (i=0; i<am; i++) {
4933       ncols_o = bi[i+1] - bi[i];
4934       ncols_d = ai[i+1] - ai[i];
4935       /* off-diagonal portion of A */
4936       for (jo=0; jo<ncols_o; jo++) {
4937         col = cmap[*bj];
4938         if (col >= cstart) break;
4939         cj[k]   = col; bj++;
4940         ca[k++] = *ba++;
4941       }
4942       /* diagonal portion of A */
4943       for (j=0; j<ncols_d; j++) {
4944         cj[k]   = cstart + *aj++;
4945         ca[k++] = *aa++;
4946       }
4947       /* off-diagonal portion of A */
4948       for (j=jo; j<ncols_o; j++) {
4949         cj[k]   = cmap[*bj++];
4950         ca[k++] = *ba++;
4951       }
4952     }
4953     /* put together the new matrix */
4954     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4955     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4956     /* Since these are PETSc arrays, change flags to free them as necessary. */
4957     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4958     mat->free_a  = PETSC_TRUE;
4959     mat->free_ij = PETSC_TRUE;
4960     mat->nonew   = 0;
4961   } else if (scall == MAT_REUSE_MATRIX) {
4962     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4963     ci = mat->i; cj = mat->j; cam = mat->a;
4964     for (i=0; i<am; i++) {
4965       /* off-diagonal portion of A */
4966       ncols_o = bi[i+1] - bi[i];
4967       for (jo=0; jo<ncols_o; jo++) {
4968         col = cmap[*bj];
4969         if (col >= cstart) break;
4970         *cam++ = *ba++; bj++;
4971       }
4972       /* diagonal portion of A */
4973       ncols_d = ai[i+1] - ai[i];
4974       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4975       /* off-diagonal portion of A */
4976       for (j=jo; j<ncols_o; j++) {
4977         *cam++ = *ba++; bj++;
4978       }
4979     }
4980   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4981   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4982   PetscFunctionReturn(0);
4983 }
4984 
4985 /*@C
4986      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4987 
4988     Not Collective
4989 
4990    Input Parameters:
4991 +    A - the matrix
4992 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4993 -    row, col - index sets of rows and columns to extract (or NULL)
4994 
4995    Output Parameter:
4996 .    A_loc - the local sequential matrix generated
4997 
4998     Level: developer
4999 
5000 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5001 
5002 @*/
5003 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5004 {
5005   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5006   PetscErrorCode ierr;
5007   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5008   IS             isrowa,iscola;
5009   Mat            *aloc;
5010   PetscBool      match;
5011 
5012   PetscFunctionBegin;
5013   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5014   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5015   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5016   if (!row) {
5017     start = A->rmap->rstart; end = A->rmap->rend;
5018     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5019   } else {
5020     isrowa = *row;
5021   }
5022   if (!col) {
5023     start = A->cmap->rstart;
5024     cmap  = a->garray;
5025     nzA   = a->A->cmap->n;
5026     nzB   = a->B->cmap->n;
5027     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5028     ncols = 0;
5029     for (i=0; i<nzB; i++) {
5030       if (cmap[i] < start) idx[ncols++] = cmap[i];
5031       else break;
5032     }
5033     imark = i;
5034     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5035     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5036     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5037   } else {
5038     iscola = *col;
5039   }
5040   if (scall != MAT_INITIAL_MATRIX) {
5041     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5042     aloc[0] = *A_loc;
5043   }
5044   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5045   *A_loc = aloc[0];
5046   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5047   if (!row) {
5048     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5049   }
5050   if (!col) {
5051     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5052   }
5053   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5054   PetscFunctionReturn(0);
5055 }
5056 
5057 /*@C
5058     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5059 
5060     Collective on Mat
5061 
5062    Input Parameters:
5063 +    A,B - the matrices in mpiaij format
5064 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5065 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5066 
5067    Output Parameter:
5068 +    rowb, colb - index sets of rows and columns of B to extract
5069 -    B_seq - the sequential matrix generated
5070 
5071     Level: developer
5072 
5073 @*/
5074 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5075 {
5076   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5077   PetscErrorCode ierr;
5078   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5079   IS             isrowb,iscolb;
5080   Mat            *bseq=NULL;
5081 
5082   PetscFunctionBegin;
5083   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5084     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5085   }
5086   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5087 
5088   if (scall == MAT_INITIAL_MATRIX) {
5089     start = A->cmap->rstart;
5090     cmap  = a->garray;
5091     nzA   = a->A->cmap->n;
5092     nzB   = a->B->cmap->n;
5093     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5094     ncols = 0;
5095     for (i=0; i<nzB; i++) {  /* row < local row index */
5096       if (cmap[i] < start) idx[ncols++] = cmap[i];
5097       else break;
5098     }
5099     imark = i;
5100     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5101     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5102     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5103     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5104   } else {
5105     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5106     isrowb  = *rowb; iscolb = *colb;
5107     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5108     bseq[0] = *B_seq;
5109   }
5110   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5111   *B_seq = bseq[0];
5112   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5113   if (!rowb) {
5114     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5115   } else {
5116     *rowb = isrowb;
5117   }
5118   if (!colb) {
5119     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5120   } else {
5121     *colb = iscolb;
5122   }
5123   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5124   PetscFunctionReturn(0);
5125 }
5126 
5127 /*
5128     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5129     of the OFF-DIAGONAL portion of local A
5130 
5131     Collective on Mat
5132 
5133    Input Parameters:
5134 +    A,B - the matrices in mpiaij format
5135 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5136 
5137    Output Parameter:
5138 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5139 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5140 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5141 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5142 
5143     Level: developer
5144 
5145 */
5146 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5147 {
5148   VecScatter_MPI_General *gen_to,*gen_from;
5149   PetscErrorCode         ierr;
5150   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5151   Mat_SeqAIJ             *b_oth;
5152   VecScatter             ctx;
5153   MPI_Comm               comm;
5154   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5155   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5156   PetscInt               *rvalues,*svalues;
5157   MatScalar              *b_otha,*bufa,*bufA;
5158   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5159   MPI_Request            *rwaits = NULL,*swaits = NULL;
5160   MPI_Status             *sstatus,rstatus;
5161   PetscMPIInt            jj,size;
5162   PetscInt               *cols,sbs,rbs;
5163   PetscScalar            *vals;
5164 
5165   PetscFunctionBegin;
5166   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5167   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5168 
5169   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5170     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5171   }
5172   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5173   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5174 
5175   if (size == 1) {
5176     startsj_s = NULL;
5177     bufa_ptr  = NULL;
5178     *B_oth    = NULL;
5179     PetscFunctionReturn(0);
5180   }
5181 
5182   if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */
5183     a->Mvctx_mpi1_flg = PETSC_TRUE;
5184     ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5185   }
5186   ctx = a->Mvctx_mpi1;
5187   tag = ((PetscObject)ctx)->tag;
5188 
5189   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5190   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5191   nrecvs   = gen_from->n;
5192   nsends   = gen_to->n;
5193 
5194   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5195   srow    = gen_to->indices;    /* local row index to be sent */
5196   sstarts = gen_to->starts;
5197   sprocs  = gen_to->procs;
5198   sstatus = gen_to->sstatus;
5199   sbs     = gen_to->bs;
5200   rstarts = gen_from->starts;
5201   rprocs  = gen_from->procs;
5202   rbs     = gen_from->bs;
5203 
5204   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5205   if (scall == MAT_INITIAL_MATRIX) {
5206     /* i-array */
5207     /*---------*/
5208     /*  post receives */
5209     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5210     for (i=0; i<nrecvs; i++) {
5211       rowlen = rvalues + rstarts[i]*rbs;
5212       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5213       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5214     }
5215 
5216     /* pack the outgoing message */
5217     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5218 
5219     sstartsj[0] = 0;
5220     rstartsj[0] = 0;
5221     len         = 0; /* total length of j or a array to be sent */
5222     k           = 0;
5223     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5224     for (i=0; i<nsends; i++) {
5225       rowlen = svalues + sstarts[i]*sbs;
5226       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5227       for (j=0; j<nrows; j++) {
5228         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5229         for (l=0; l<sbs; l++) {
5230           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5231 
5232           rowlen[j*sbs+l] = ncols;
5233 
5234           len += ncols;
5235           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5236         }
5237         k++;
5238       }
5239       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5240 
5241       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5242     }
5243     /* recvs and sends of i-array are completed */
5244     i = nrecvs;
5245     while (i--) {
5246       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5247     }
5248     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5249     ierr = PetscFree(svalues);CHKERRQ(ierr);
5250 
5251     /* allocate buffers for sending j and a arrays */
5252     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5253     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5254 
5255     /* create i-array of B_oth */
5256     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5257 
5258     b_othi[0] = 0;
5259     len       = 0; /* total length of j or a array to be received */
5260     k         = 0;
5261     for (i=0; i<nrecvs; i++) {
5262       rowlen = rvalues + rstarts[i]*rbs;
5263       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5264       for (j=0; j<nrows; j++) {
5265         b_othi[k+1] = b_othi[k] + rowlen[j];
5266         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5267         k++;
5268       }
5269       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5270     }
5271     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5272 
5273     /* allocate space for j and a arrrays of B_oth */
5274     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5275     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5276 
5277     /* j-array */
5278     /*---------*/
5279     /*  post receives of j-array */
5280     for (i=0; i<nrecvs; i++) {
5281       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5282       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5283     }
5284 
5285     /* pack the outgoing message j-array */
5286     k = 0;
5287     for (i=0; i<nsends; i++) {
5288       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5289       bufJ  = bufj+sstartsj[i];
5290       for (j=0; j<nrows; j++) {
5291         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5292         for (ll=0; ll<sbs; ll++) {
5293           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5294           for (l=0; l<ncols; l++) {
5295             *bufJ++ = cols[l];
5296           }
5297           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5298         }
5299       }
5300       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5301     }
5302 
5303     /* recvs and sends of j-array are completed */
5304     i = nrecvs;
5305     while (i--) {
5306       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5307     }
5308     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5309   } else if (scall == MAT_REUSE_MATRIX) {
5310     sstartsj = *startsj_s;
5311     rstartsj = *startsj_r;
5312     bufa     = *bufa_ptr;
5313     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5314     b_otha   = b_oth->a;
5315   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5316 
5317   /* a-array */
5318   /*---------*/
5319   /*  post receives of a-array */
5320   for (i=0; i<nrecvs; i++) {
5321     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5322     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5323   }
5324 
5325   /* pack the outgoing message a-array */
5326   k = 0;
5327   for (i=0; i<nsends; i++) {
5328     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5329     bufA  = bufa+sstartsj[i];
5330     for (j=0; j<nrows; j++) {
5331       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5332       for (ll=0; ll<sbs; ll++) {
5333         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5334         for (l=0; l<ncols; l++) {
5335           *bufA++ = vals[l];
5336         }
5337         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5338       }
5339     }
5340     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5341   }
5342   /* recvs and sends of a-array are completed */
5343   i = nrecvs;
5344   while (i--) {
5345     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5346   }
5347   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5348   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5349 
5350   if (scall == MAT_INITIAL_MATRIX) {
5351     /* put together the new matrix */
5352     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5353 
5354     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5355     /* Since these are PETSc arrays, change flags to free them as necessary. */
5356     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5357     b_oth->free_a  = PETSC_TRUE;
5358     b_oth->free_ij = PETSC_TRUE;
5359     b_oth->nonew   = 0;
5360 
5361     ierr = PetscFree(bufj);CHKERRQ(ierr);
5362     if (!startsj_s || !bufa_ptr) {
5363       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5364       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5365     } else {
5366       *startsj_s = sstartsj;
5367       *startsj_r = rstartsj;
5368       *bufa_ptr  = bufa;
5369     }
5370   }
5371   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5372   PetscFunctionReturn(0);
5373 }
5374 
5375 /*@C
5376   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5377 
5378   Not Collective
5379 
5380   Input Parameters:
5381 . A - The matrix in mpiaij format
5382 
5383   Output Parameter:
5384 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5385 . colmap - A map from global column index to local index into lvec
5386 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5387 
5388   Level: developer
5389 
5390 @*/
5391 #if defined(PETSC_USE_CTABLE)
5392 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5393 #else
5394 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5395 #endif
5396 {
5397   Mat_MPIAIJ *a;
5398 
5399   PetscFunctionBegin;
5400   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5401   PetscValidPointer(lvec, 2);
5402   PetscValidPointer(colmap, 3);
5403   PetscValidPointer(multScatter, 4);
5404   a = (Mat_MPIAIJ*) A->data;
5405   if (lvec) *lvec = a->lvec;
5406   if (colmap) *colmap = a->colmap;
5407   if (multScatter) *multScatter = a->Mvctx;
5408   PetscFunctionReturn(0);
5409 }
5410 
5411 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5412 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5413 #if defined(PETSC_HAVE_MKL_SPARSE)
5414 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5415 #endif
5416 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5417 #if defined(PETSC_HAVE_ELEMENTAL)
5418 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5419 #endif
5420 #if defined(PETSC_HAVE_HYPRE)
5421 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5422 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5423 #endif
5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5426 
5427 /*
5428     Computes (B'*A')' since computing B*A directly is untenable
5429 
5430                n                       p                          p
5431         (              )       (              )         (                  )
5432       m (      A       )  *  n (       B      )   =   m (         C        )
5433         (              )       (              )         (                  )
5434 
5435 */
5436 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5437 {
5438   PetscErrorCode ierr;
5439   Mat            At,Bt,Ct;
5440 
5441   PetscFunctionBegin;
5442   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5443   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5444   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5445   ierr = MatDestroy(&At);CHKERRQ(ierr);
5446   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5447   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5448   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5449   PetscFunctionReturn(0);
5450 }
5451 
5452 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5453 {
5454   PetscErrorCode ierr;
5455   PetscInt       m=A->rmap->n,n=B->cmap->n;
5456   Mat            Cmat;
5457 
5458   PetscFunctionBegin;
5459   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5460   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5461   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5462   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5463   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5464   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5465   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5466   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5467 
5468   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5469 
5470   *C = Cmat;
5471   PetscFunctionReturn(0);
5472 }
5473 
5474 /* ----------------------------------------------------------------*/
5475 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5476 {
5477   PetscErrorCode ierr;
5478 
5479   PetscFunctionBegin;
5480   if (scall == MAT_INITIAL_MATRIX) {
5481     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5482     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5483     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5484   }
5485   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5486   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5487   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5488   PetscFunctionReturn(0);
5489 }
5490 
5491 /*MC
5492    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5493 
5494    Options Database Keys:
5495 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5496 
5497   Level: beginner
5498 
5499 .seealso: MatCreateAIJ()
5500 M*/
5501 
5502 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5503 {
5504   Mat_MPIAIJ     *b;
5505   PetscErrorCode ierr;
5506   PetscMPIInt    size;
5507 
5508   PetscFunctionBegin;
5509   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5510 
5511   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5512   B->data       = (void*)b;
5513   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5514   B->assembled  = PETSC_FALSE;
5515   B->insertmode = NOT_SET_VALUES;
5516   b->size       = size;
5517 
5518   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5519 
5520   /* build cache for off array entries formed */
5521   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5522 
5523   b->donotstash  = PETSC_FALSE;
5524   b->colmap      = 0;
5525   b->garray      = 0;
5526   b->roworiented = PETSC_TRUE;
5527 
5528   /* stuff used for matrix vector multiply */
5529   b->lvec  = NULL;
5530   b->Mvctx = NULL;
5531 
5532   /* stuff for MatGetRow() */
5533   b->rowindices   = 0;
5534   b->rowvalues    = 0;
5535   b->getrowactive = PETSC_FALSE;
5536 
5537   /* flexible pointer used in CUSP/CUSPARSE classes */
5538   b->spptr = NULL;
5539 
5540   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5549 #if defined(PETSC_HAVE_MKL_SPARSE)
5550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5551 #endif
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5554 #if defined(PETSC_HAVE_ELEMENTAL)
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5556 #endif
5557 #if defined(PETSC_HAVE_HYPRE)
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5559 #endif
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5565 #if defined(PETSC_HAVE_HYPRE)
5566   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5567 #endif
5568   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5569   PetscFunctionReturn(0);
5570 }
5571 
5572 /*@C
5573      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5574          and "off-diagonal" part of the matrix in CSR format.
5575 
5576    Collective on MPI_Comm
5577 
5578    Input Parameters:
5579 +  comm - MPI communicator
5580 .  m - number of local rows (Cannot be PETSC_DECIDE)
5581 .  n - This value should be the same as the local size used in creating the
5582        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5583        calculated if N is given) For square matrices n is almost always m.
5584 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5585 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5586 .   i - row indices for "diagonal" portion of matrix
5587 .   j - column indices
5588 .   a - matrix values
5589 .   oi - row indices for "off-diagonal" portion of matrix
5590 .   oj - column indices
5591 -   oa - matrix values
5592 
5593    Output Parameter:
5594 .   mat - the matrix
5595 
5596    Level: advanced
5597 
5598    Notes:
5599        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5600        must free the arrays once the matrix has been destroyed and not before.
5601 
5602        The i and j indices are 0 based
5603 
5604        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5605 
5606        This sets local rows and cannot be used to set off-processor values.
5607 
5608        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5609        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5610        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5611        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5612        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5613        communication if it is known that only local entries will be set.
5614 
5615 .keywords: matrix, aij, compressed row, sparse, parallel
5616 
5617 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5618           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5619 @*/
5620 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5621 {
5622   PetscErrorCode ierr;
5623   Mat_MPIAIJ     *maij;
5624 
5625   PetscFunctionBegin;
5626   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5627   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5628   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5629   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5630   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5631   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5632   maij = (Mat_MPIAIJ*) (*mat)->data;
5633 
5634   (*mat)->preallocated = PETSC_TRUE;
5635 
5636   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5637   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5638 
5639   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5640   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5641 
5642   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5643   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5644   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5645   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5646 
5647   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5648   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5649   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5650   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5651   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5652   PetscFunctionReturn(0);
5653 }
5654 
5655 /*
5656     Special version for direct calls from Fortran
5657 */
5658 #include <petsc/private/fortranimpl.h>
5659 
5660 /* Change these macros so can be used in void function */
5661 #undef CHKERRQ
5662 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5663 #undef SETERRQ2
5664 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5665 #undef SETERRQ3
5666 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5667 #undef SETERRQ
5668 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5669 
5670 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5671 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5672 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5673 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5674 #else
5675 #endif
5676 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5677 {
5678   Mat            mat  = *mmat;
5679   PetscInt       m    = *mm, n = *mn;
5680   InsertMode     addv = *maddv;
5681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5682   PetscScalar    value;
5683   PetscErrorCode ierr;
5684 
5685   MatCheckPreallocated(mat,1);
5686   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5687 
5688 #if defined(PETSC_USE_DEBUG)
5689   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5690 #endif
5691   {
5692     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5693     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5694     PetscBool roworiented = aij->roworiented;
5695 
5696     /* Some Variables required in the macro */
5697     Mat        A                 = aij->A;
5698     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5699     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5700     MatScalar  *aa               = a->a;
5701     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5702     Mat        B                 = aij->B;
5703     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5704     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5705     MatScalar  *ba               = b->a;
5706 
5707     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5708     PetscInt  nonew = a->nonew;
5709     MatScalar *ap1,*ap2;
5710 
5711     PetscFunctionBegin;
5712     for (i=0; i<m; i++) {
5713       if (im[i] < 0) continue;
5714 #if defined(PETSC_USE_DEBUG)
5715       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5716 #endif
5717       if (im[i] >= rstart && im[i] < rend) {
5718         row      = im[i] - rstart;
5719         lastcol1 = -1;
5720         rp1      = aj + ai[row];
5721         ap1      = aa + ai[row];
5722         rmax1    = aimax[row];
5723         nrow1    = ailen[row];
5724         low1     = 0;
5725         high1    = nrow1;
5726         lastcol2 = -1;
5727         rp2      = bj + bi[row];
5728         ap2      = ba + bi[row];
5729         rmax2    = bimax[row];
5730         nrow2    = bilen[row];
5731         low2     = 0;
5732         high2    = nrow2;
5733 
5734         for (j=0; j<n; j++) {
5735           if (roworiented) value = v[i*n+j];
5736           else value = v[i+j*m];
5737           if (in[j] >= cstart && in[j] < cend) {
5738             col = in[j] - cstart;
5739             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5740             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5741           } else if (in[j] < 0) continue;
5742 #if defined(PETSC_USE_DEBUG)
5743           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5744 #endif
5745           else {
5746             if (mat->was_assembled) {
5747               if (!aij->colmap) {
5748                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5749               }
5750 #if defined(PETSC_USE_CTABLE)
5751               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5752               col--;
5753 #else
5754               col = aij->colmap[in[j]] - 1;
5755 #endif
5756               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5757               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5758                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5759                 col  =  in[j];
5760                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5761                 B     = aij->B;
5762                 b     = (Mat_SeqAIJ*)B->data;
5763                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5764                 rp2   = bj + bi[row];
5765                 ap2   = ba + bi[row];
5766                 rmax2 = bimax[row];
5767                 nrow2 = bilen[row];
5768                 low2  = 0;
5769                 high2 = nrow2;
5770                 bm    = aij->B->rmap->n;
5771                 ba    = b->a;
5772               }
5773             } else col = in[j];
5774             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5775           }
5776         }
5777       } else if (!aij->donotstash) {
5778         if (roworiented) {
5779           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5780         } else {
5781           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5782         }
5783       }
5784     }
5785   }
5786   PetscFunctionReturnVoid();
5787 }
5788 
5789