xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 75fe3c32bc4d43b8babf78ac6e8aee5fcc30fc8b)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
218   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) ap1[_i] += value;   \
444           else                    ap1[_i] = value; \
445           goto a_noinsert; \
446         } \
447       }  \
448       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
449       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
450       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
451       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
452       N = nrow1++ - 1; a->nz++; high1++; \
453       /* shift up all the later entries in this row */ \
454       for (ii=N; ii>=_i; ii--) { \
455         rp1[ii+1] = rp1[ii]; \
456         ap1[ii+1] = ap1[ii]; \
457       } \
458       rp1[_i] = col;  \
459       ap1[_i] = value;  \
460       A->nonzerostate++;\
461       a_noinsert: ; \
462       ailen[row] = nrow1; \
463 }
464 
465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
466   { \
467     if (col <= lastcol2) low2 = 0;                        \
468     else high2 = nrow2;                                   \
469     lastcol2 = col;                                       \
470     while (high2-low2 > 5) {                              \
471       t = (low2+high2)/2;                                 \
472       if (rp2[t] > col) high2 = t;                        \
473       else             low2  = t;                         \
474     }                                                     \
475     for (_i=low2; _i<high2; _i++) {                       \
476       if (rp2[_i] > col) break;                           \
477       if (rp2[_i] == col) {                               \
478         if (addv == ADD_VALUES) ap2[_i] += value;         \
479         else                    ap2[_i] = value;          \
480         goto b_noinsert;                                  \
481       }                                                   \
482     }                                                     \
483     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
484     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
485     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
486     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
487     N = nrow2++ - 1; b->nz++; high2++;                    \
488     /* shift up all the later entries in this row */      \
489     for (ii=N; ii>=_i; ii--) {                            \
490       rp2[ii+1] = rp2[ii];                                \
491       ap2[ii+1] = ap2[ii];                                \
492     }                                                     \
493     rp2[_i] = col;                                        \
494     ap2[_i] = value;                                      \
495     B->nonzerostate++;                                    \
496     b_noinsert: ;                                         \
497     bilen[row] = nrow2;                                   \
498   }
499 
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
527 {
528   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
529   PetscScalar    value;
530   PetscErrorCode ierr;
531   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
532   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
533   PetscBool      roworiented = aij->roworiented;
534 
535   /* Some Variables required in the macro */
536   Mat        A                 = aij->A;
537   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
538   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
539   MatScalar  *aa               = a->a;
540   PetscBool  ignorezeroentries = a->ignorezeroentries;
541   Mat        B                 = aij->B;
542   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
543   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
544   MatScalar  *ba               = b->a;
545 
546   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
547   PetscInt  nonew;
548   MatScalar *ap1,*ap2;
549 
550   PetscFunctionBegin;
551   for (i=0; i<m; i++) {
552     if (im[i] < 0) continue;
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
555 #endif
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = aj + ai[row];
560       ap1      = aa + ai[row];
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = bj + bi[row];
567       ap2      = ba + bi[row];
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j=0; j<n; j++) {
574         if (roworiented) value = v[i*n+j];
575         else             value = v[i+j*m];
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
580           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!aij->colmap) {
588               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 #if defined(PETSC_USE_CTABLE)
591             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
592             col--;
593 #else
594             col = aij->colmap[in[j]] - 1;
595 #endif
596             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
597               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
598               col  =  in[j];
599               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
600               B     = aij->B;
601               b     = (Mat_SeqAIJ*)B->data;
602               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
603               rp2   = bj + bi[row];
604               ap2   = ba + bi[row];
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0) {
612               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
613                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
614               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
619         }
620       }
621     } else {
622       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
638     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
639     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
640 */
641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
642 {
643   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
644   Mat            A           = aij->A; /* diagonal part of the matrix */
645   Mat            B           = aij->B; /* offdiagonal part of the matrix */
646   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
647   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
648   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
649   PetscInt       *ailen      = a->ilen,*aj = a->j;
650   PetscInt       *bilen      = b->ilen,*bj = b->j;
651   PetscInt       am          = aij->A->rmap->n,j;
652   PetscInt       diag_so_far = 0,dnz;
653   PetscInt       offd_so_far = 0,onz;
654 
655   PetscFunctionBegin;
656   /* Iterate over all rows of the matrix */
657   for (j=0; j<am; j++) {
658     dnz = onz = 0;
659     /*  Iterate over all non-zero columns of the current row */
660     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
661       /* If column is in the diagonal */
662       if (mat_j[col] >= cstart && mat_j[col] < cend) {
663         aj[diag_so_far++] = mat_j[col] - cstart;
664         dnz++;
665       } else { /* off-diagonal entries */
666         bj[offd_so_far++] = mat_j[col];
667         onz++;
668       }
669     }
670     ailen[j] = dnz;
671     bilen[j] = onz;
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 /*
677     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
678     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
679     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
680     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
681     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
682 */
683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
684 {
685   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
686   Mat            A      = aij->A; /* diagonal part of the matrix */
687   Mat            B      = aij->B; /* offdiagonal part of the matrix */
688   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
689   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
690   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
691   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
692   PetscInt       *ailen = a->ilen,*aj = a->j;
693   PetscInt       *bilen = b->ilen,*bj = b->j;
694   PetscInt       am     = aij->A->rmap->n,j;
695   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
696   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
697   PetscScalar    *aa = a->a,*ba = b->a;
698 
699   PetscFunctionBegin;
700   /* Iterate over all rows of the matrix */
701   for (j=0; j<am; j++) {
702     dnz_row = onz_row = 0;
703     rowstart_offd = full_offd_i[j];
704     rowstart_diag = full_diag_i[j];
705     /*  Iterate over all non-zero columns of the current row */
706     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
707       /* If column is in the diagonal */
708       if (mat_j[col] >= cstart && mat_j[col] < cend) {
709         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
710         aa[rowstart_diag+dnz_row] = mat_a[col];
711         dnz_row++;
712       } else { /* off-diagonal entries */
713         bj[rowstart_offd+onz_row] = mat_j[col];
714         ba[rowstart_offd+onz_row] = mat_a[col];
715         onz_row++;
716       }
717     }
718     ailen[j] = dnz_row;
719     bilen[j] = onz_row;
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
725 {
726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
727   PetscErrorCode ierr;
728   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
729   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
730 
731   PetscFunctionBegin;
732   for (i=0; i<m; i++) {
733     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
734     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
735     if (idxm[i] >= rstart && idxm[i] < rend) {
736       row = idxm[i] - rstart;
737       for (j=0; j<n; j++) {
738         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
739         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
740         if (idxn[j] >= cstart && idxn[j] < cend) {
741           col  = idxn[j] - cstart;
742           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
743         } else {
744           if (!aij->colmap) {
745             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
746           }
747 #if defined(PETSC_USE_CTABLE)
748           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
749           col--;
750 #else
751           col = aij->colmap[idxn[j]] - 1;
752 #endif
753           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
754           else {
755             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
756           }
757         }
758       }
759     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
765 
766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
767 {
768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
769   PetscErrorCode ierr;
770   PetscInt       nstash,reallocs;
771 
772   PetscFunctionBegin;
773   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
774 
775   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
776   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
777   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
782 {
783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
784   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
785   PetscErrorCode ierr;
786   PetscMPIInt    n;
787   PetscInt       i,j,rstart,ncols,flg;
788   PetscInt       *row,*col;
789   PetscBool      other_disassembled;
790   PetscScalar    *val;
791 
792   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
793 
794   PetscFunctionBegin;
795   if (!aij->donotstash && !mat->nooffprocentries) {
796     while (1) {
797       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
798       if (!flg) break;
799 
800       for (i=0; i<n; ) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         /* Now assemble all these values with a single function call */
808         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
809 
810         i = j;
811       }
812     }
813     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
814   }
815   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
816   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
817 
818   /* determine if any processor has disassembled, if so we must
819      also disassemble ourselfs, in order that we may reassemble. */
820   /*
821      if nonzero structure of submatrix B cannot change then we know that
822      no processor disassembled thus we can skip this stuff
823   */
824   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
825     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
826     if (mat->was_assembled && !other_disassembled) {
827       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
828     }
829   }
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
831     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
832   }
833   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
834   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
835   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
836 
837   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
838 
839   aij->rowvalues = 0;
840 
841   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
842   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
843 
844   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
846     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
859   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
864 {
865   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
866   PetscInt      *lrows;
867   PetscInt       r, len;
868   PetscBool      cong;
869   PetscErrorCode ierr;
870 
871   PetscFunctionBegin;
872   /* get locally owned rows */
873   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
874   /* fix right hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
880     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
881     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
882     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
883     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
884   }
885   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
886   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
887   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
888   if ((diag != 0.0) && cong) {
889     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
890   } else if (diag != 0.0) {
891     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
892     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
893     for (r = 0; r < len; ++r) {
894       const PetscInt row = lrows[r] + A->rmap->rstart;
895       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
896     }
897     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
899   } else {
900     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
901   }
902   ierr = PetscFree(lrows);CHKERRQ(ierr);
903 
904   /* only change matrix nonzero state if pattern was allowed to be changed */
905   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
906     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
907     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
908   }
909   PetscFunctionReturn(0);
910 }
911 
912 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
913 {
914   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
915   PetscErrorCode    ierr;
916   PetscMPIInt       n = A->rmap->n;
917   PetscInt          i,j,r,m,p = 0,len = 0;
918   PetscInt          *lrows,*owners = A->rmap->range;
919   PetscSFNode       *rrows;
920   PetscSF           sf;
921   const PetscScalar *xx;
922   PetscScalar       *bb,*mask;
923   Vec               xmask,lmask;
924   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
925   const PetscInt    *aj, *ii,*ridx;
926   PetscScalar       *aa;
927 
928   PetscFunctionBegin;
929   /* Create SF where leaves are input rows and roots are owned rows */
930   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
931   for (r = 0; r < n; ++r) lrows[r] = -1;
932   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
933   for (r = 0; r < N; ++r) {
934     const PetscInt idx   = rows[r];
935     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
936     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
937       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
938     }
939     rrows[r].rank  = p;
940     rrows[r].index = rows[r] - owners[p];
941   }
942   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
943   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
944   /* Collect flags for rows to be zeroed */
945   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
947   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
948   /* Compress and put in row numbers */
949   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
950   /* zero diagonal part of matrix */
951   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
952   /* handle off diagonal part of matrix */
953   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
954   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
955   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
956   for (i=0; i<len; i++) bb[lrows[i]] = 1;
957   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
958   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
961   if (x) {
962     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
965     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
966   }
967   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
968   /* remove zeroed rows of off diagonal matrix */
969   ii = aij->i;
970   for (i=0; i<len; i++) {
971     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
972   }
973   /* loop over all elements of off process part of matrix zeroing removed columns*/
974   if (aij->compressedrow.use) {
975     m    = aij->compressedrow.nrows;
976     ii   = aij->compressedrow.i;
977     ridx = aij->compressedrow.rindex;
978     for (i=0; i<m; i++) {
979       n  = ii[i+1] - ii[i];
980       aj = aij->j + ii[i];
981       aa = aij->a + ii[i];
982 
983       for (j=0; j<n; j++) {
984         if (PetscAbsScalar(mask[*aj])) {
985           if (b) bb[*ridx] -= *aa*xx[*aj];
986           *aa = 0.0;
987         }
988         aa++;
989         aj++;
990       }
991       ridx++;
992     }
993   } else { /* do not use compressed row format */
994     m = l->B->rmap->n;
995     for (i=0; i<m; i++) {
996       n  = ii[i+1] - ii[i];
997       aj = aij->j + ii[i];
998       aa = aij->a + ii[i];
999       for (j=0; j<n; j++) {
1000         if (PetscAbsScalar(mask[*aj])) {
1001           if (b) bb[i] -= *aa*xx[*aj];
1002           *aa = 0.0;
1003         }
1004         aa++;
1005         aj++;
1006       }
1007     }
1008   }
1009   if (x) {
1010     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1011     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012   }
1013   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1014   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1015   ierr = PetscFree(lrows);CHKERRQ(ierr);
1016 
1017   /* only change matrix nonzero state if pattern was allowed to be changed */
1018   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1019     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1020     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1021   }
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1026 {
1027   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1028   PetscErrorCode ierr;
1029   PetscInt       nt;
1030   VecScatter     Mvctx = a->Mvctx;
1031 
1032   PetscFunctionBegin;
1033   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1034   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1035 
1036   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1038   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1039   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1044 {
1045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1046   PetscErrorCode ierr;
1047 
1048   PetscFunctionBegin;
1049   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1054 {
1055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1056   PetscErrorCode ierr;
1057   VecScatter     Mvctx = a->Mvctx;
1058 
1059   PetscFunctionBegin;
1060   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1061   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1063   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1064   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   /* do nondiagonal part */
1075   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1076   /* do local part */
1077   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1078   /* add partial results together */
1079   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1085 {
1086   MPI_Comm       comm;
1087   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1088   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1089   IS             Me,Notme;
1090   PetscErrorCode ierr;
1091   PetscInt       M,N,first,last,*notme,i;
1092   PetscBool      lf;
1093   PetscMPIInt    size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1098   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1099   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1100   if (!*f) PetscFunctionReturn(0);
1101   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1102   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1103   if (size == 1) PetscFunctionReturn(0);
1104 
1105   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1106   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1107   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1108   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1109   for (i=0; i<first; i++) notme[i] = i;
1110   for (i=last; i<M; i++) notme[i-last+first] = i;
1111   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1112   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1113   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1114   Aoff = Aoffs[0];
1115   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1116   Boff = Boffs[0];
1117   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1118   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1119   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1120   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1121   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1122   ierr = PetscFree(notme);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1127 {
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1136 {
1137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   /* do nondiagonal part */
1142   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1143   /* do local part */
1144   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1145   /* add partial results together */
1146   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 /*
1152   This only works correctly for square matrices where the subblock A->A is the
1153    diagonal block
1154 */
1155 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1156 {
1157   PetscErrorCode ierr;
1158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1159 
1160   PetscFunctionBegin;
1161   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1162   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1163   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1168 {
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1174   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1179 {
1180   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184 #if defined(PETSC_USE_LOG)
1185   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1186 #endif
1187   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1188   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1189   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1190   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1191 #if defined(PETSC_USE_CTABLE)
1192   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1193 #else
1194   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1195 #endif
1196   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1197   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1198   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1199   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1200   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1201   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1202   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1203 
1204   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1213 #if defined(PETSC_HAVE_ELEMENTAL)
1214   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1215 #endif
1216 #if defined(PETSC_HAVE_HYPRE)
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1219 #endif
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1226 {
1227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1228   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1229   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1230   PetscErrorCode ierr;
1231   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1232   int            fd;
1233   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1234   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1235   PetscScalar    *column_values;
1236   PetscInt       message_count,flowcontrolcount;
1237   FILE           *file;
1238 
1239   PetscFunctionBegin;
1240   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1241   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1242   nz   = A->nz + B->nz;
1243   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1244   if (!rank) {
1245     header[0] = MAT_FILE_CLASSID;
1246     header[1] = mat->rmap->N;
1247     header[2] = mat->cmap->N;
1248 
1249     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1251     /* get largest number of rows any processor has */
1252     rlen  = mat->rmap->n;
1253     range = mat->rmap->range;
1254     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1255   } else {
1256     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1257     rlen = mat->rmap->n;
1258   }
1259 
1260   /* load up the local row counts */
1261   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1262   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1263 
1264   /* store the row lengths to the file */
1265   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1266   if (!rank) {
1267     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1268     for (i=1; i<size; i++) {
1269       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1270       rlen = range[i+1] - range[i];
1271       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1273     }
1274     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1275   } else {
1276     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1277     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1279   }
1280   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1281 
1282   /* load up the local column indices */
1283   nzmax = nz; /* th processor needs space a largest processor needs */
1284   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1286   cnt   = 0;
1287   for (i=0; i<mat->rmap->n; i++) {
1288     for (j=B->i[i]; j<B->i[i+1]; j++) {
1289       if ((col = garray[B->j[j]]) > cstart) break;
1290       column_indices[cnt++] = col;
1291     }
1292     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1293     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1294   }
1295   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1296 
1297   /* store the column indices to the file */
1298   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1299   if (!rank) {
1300     MPI_Status status;
1301     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1302     for (i=1; i<size; i++) {
1303       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1304       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1305       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1306       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1307       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1308     }
1309     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1310   } else {
1311     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1312     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1313     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1314     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1315   }
1316   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1317 
1318   /* load up the local column values */
1319   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1320   cnt  = 0;
1321   for (i=0; i<mat->rmap->n; i++) {
1322     for (j=B->i[i]; j<B->i[i+1]; j++) {
1323       if (garray[B->j[j]] > cstart) break;
1324       column_values[cnt++] = B->a[j];
1325     }
1326     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1327     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1328   }
1329   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1330 
1331   /* store the column values to the file */
1332   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1333   if (!rank) {
1334     MPI_Status status;
1335     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1338       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1339       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1340       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1342     }
1343     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1344   } else {
1345     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1346     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1348     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1349   }
1350   ierr = PetscFree(column_values);CHKERRQ(ierr);
1351 
1352   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1353   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #include <petscdraw.h>
1358 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1359 {
1360   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1361   PetscErrorCode    ierr;
1362   PetscMPIInt       rank = aij->rank,size = aij->size;
1363   PetscBool         isdraw,iascii,isbinary;
1364   PetscViewer       sviewer;
1365   PetscViewerFormat format;
1366 
1367   PetscFunctionBegin;
1368   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1369   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1370   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1371   if (iascii) {
1372     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1373     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1374       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1375       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1376       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377       for (i=0; i<(PetscInt)size; i++) {
1378         nmax = PetscMax(nmax,nz[i]);
1379         nmin = PetscMin(nmin,nz[i]);
1380         navg += nz[i];
1381       }
1382       ierr = PetscFree(nz);CHKERRQ(ierr);
1383       navg = navg/size;
1384       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1385       PetscFunctionReturn(0);
1386     }
1387     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1388     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1389       MatInfo   info;
1390       PetscBool inodes;
1391 
1392       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1393       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1394       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1395       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1396       if (!inodes) {
1397         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1398                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1399       } else {
1400         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1401                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1402       }
1403       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1404       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1405       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1406       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1407       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1409       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1410       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1411       PetscFunctionReturn(0);
1412     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1413       PetscInt inodecount,inodelimit,*inodes;
1414       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1415       if (inodes) {
1416         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1417       } else {
1418         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1419       }
1420       PetscFunctionReturn(0);
1421     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1422       PetscFunctionReturn(0);
1423     }
1424   } else if (isbinary) {
1425     if (size == 1) {
1426       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1427       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1428     } else {
1429       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1430     }
1431     PetscFunctionReturn(0);
1432   } else if (iascii && size == 1) {
1433     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1434     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1435     PetscFunctionReturn(0);
1436   } else if (isdraw) {
1437     PetscDraw draw;
1438     PetscBool isnull;
1439     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1440     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1441     if (isnull) PetscFunctionReturn(0);
1442   }
1443 
1444   { /* assemble the entire matrix onto first processor */
1445     Mat A = NULL, Av;
1446     IS  isrow,iscol;
1447 
1448     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1449     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1450     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1451     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1452 /*  The commented code uses MatCreateSubMatrices instead */
1453 /*
1454     Mat *AA, A = NULL, Av;
1455     IS  isrow,iscol;
1456 
1457     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1458     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1459     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1460     if (!rank) {
1461        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1462        A    = AA[0];
1463        Av   = AA[0];
1464     }
1465     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1466 */
1467     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1468     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1469     /*
1470        Everyone has to call to draw the matrix since the graphics waits are
1471        synchronized across all processors that share the PetscDraw object
1472     */
1473     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1474     if (!rank) {
1475       if (((PetscObject)mat)->name) {
1476         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1477       }
1478       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1479     }
1480     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1481     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1482     ierr = MatDestroy(&A);CHKERRQ(ierr);
1483   }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1488 {
1489   PetscErrorCode ierr;
1490   PetscBool      iascii,isdraw,issocket,isbinary;
1491 
1492   PetscFunctionBegin;
1493   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1494   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1495   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1496   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1497   if (iascii || isdraw || isbinary || issocket) {
1498     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1504 {
1505   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1506   PetscErrorCode ierr;
1507   Vec            bb1 = 0;
1508   PetscBool      hasop;
1509 
1510   PetscFunctionBegin;
1511   if (flag == SOR_APPLY_UPPER) {
1512     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1513     PetscFunctionReturn(0);
1514   }
1515 
1516   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1517     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1518   }
1519 
1520   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1521     if (flag & SOR_ZERO_INITIAL_GUESS) {
1522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1523       its--;
1524     }
1525 
1526     while (its--) {
1527       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529 
1530       /* update rhs: bb1 = bb - B*x */
1531       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1532       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1533 
1534       /* local sweep */
1535       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1536     }
1537   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1538     if (flag & SOR_ZERO_INITIAL_GUESS) {
1539       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1540       its--;
1541     }
1542     while (its--) {
1543       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545 
1546       /* update rhs: bb1 = bb - B*x */
1547       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1548       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1549 
1550       /* local sweep */
1551       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1552     }
1553   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1554     if (flag & SOR_ZERO_INITIAL_GUESS) {
1555       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1556       its--;
1557     }
1558     while (its--) {
1559       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1560       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1561 
1562       /* update rhs: bb1 = bb - B*x */
1563       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1564       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1565 
1566       /* local sweep */
1567       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1568     }
1569   } else if (flag & SOR_EISENSTAT) {
1570     Vec xx1;
1571 
1572     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1573     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1574 
1575     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1576     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577     if (!mat->diag) {
1578       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1579       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1580     }
1581     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1582     if (hasop) {
1583       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1584     } else {
1585       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1586     }
1587     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1588 
1589     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1590 
1591     /* local sweep */
1592     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1593     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1594     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1595   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1596 
1597   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1598 
1599   matin->factorerrortype = mat->A->factorerrortype;
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1604 {
1605   Mat            aA,aB,Aperm;
1606   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1607   PetscScalar    *aa,*ba;
1608   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1609   PetscSF        rowsf,sf;
1610   IS             parcolp = NULL;
1611   PetscBool      done;
1612   PetscErrorCode ierr;
1613 
1614   PetscFunctionBegin;
1615   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1616   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1617   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1618   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1619 
1620   /* Invert row permutation to find out where my rows should go */
1621   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1622   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1623   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1624   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1625   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1626   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1627 
1628   /* Invert column permutation to find out where my columns should go */
1629   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1630   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1631   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1632   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1633   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1634   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1635   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1636 
1637   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1638   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1639   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1640 
1641   /* Find out where my gcols should go */
1642   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1643   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1644   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1645   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1646   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1647   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1649   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1650 
1651   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1652   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1653   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) {
1655     PetscInt row = rdest[i],rowner;
1656     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1657     for (j=ai[i]; j<ai[i+1]; j++) {
1658       PetscInt cowner,col = cdest[aj[j]];
1659       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1660       if (rowner == cowner) dnnz[i]++;
1661       else onnz[i]++;
1662     }
1663     for (j=bi[i]; j<bi[i+1]; j++) {
1664       PetscInt cowner,col = gcdest[bj[j]];
1665       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1666       if (rowner == cowner) dnnz[i]++;
1667       else onnz[i]++;
1668     }
1669   }
1670   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1671   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1672   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1673   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1674   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1675 
1676   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1677   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1678   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1679   for (i=0; i<m; i++) {
1680     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1681     PetscInt j0,rowlen;
1682     rowlen = ai[i+1] - ai[i];
1683     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1684       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1685       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1686     }
1687     rowlen = bi[i+1] - bi[i];
1688     for (j0=j=0; j<rowlen; j0=j) {
1689       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1690       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1691     }
1692   }
1693   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1696   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1697   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1698   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1699   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1701   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1702   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1703   *B = Aperm;
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1708 {
1709   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1714   if (ghosts) *ghosts = aij->garray;
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1719 {
1720   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1721   Mat            A    = mat->A,B = mat->B;
1722   PetscErrorCode ierr;
1723   PetscReal      isend[5],irecv[5];
1724 
1725   PetscFunctionBegin;
1726   info->block_size = 1.0;
1727   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1728 
1729   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1730   isend[3] = info->memory;  isend[4] = info->mallocs;
1731 
1732   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1733 
1734   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1735   isend[3] += info->memory;  isend[4] += info->mallocs;
1736   if (flag == MAT_LOCAL) {
1737     info->nz_used      = isend[0];
1738     info->nz_allocated = isend[1];
1739     info->nz_unneeded  = isend[2];
1740     info->memory       = isend[3];
1741     info->mallocs      = isend[4];
1742   } else if (flag == MAT_GLOBAL_MAX) {
1743     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1744 
1745     info->nz_used      = irecv[0];
1746     info->nz_allocated = irecv[1];
1747     info->nz_unneeded  = irecv[2];
1748     info->memory       = irecv[3];
1749     info->mallocs      = irecv[4];
1750   } else if (flag == MAT_GLOBAL_SUM) {
1751     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1752 
1753     info->nz_used      = irecv[0];
1754     info->nz_allocated = irecv[1];
1755     info->nz_unneeded  = irecv[2];
1756     info->memory       = irecv[3];
1757     info->mallocs      = irecv[4];
1758   }
1759   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1760   info->fill_ratio_needed = 0;
1761   info->factor_mallocs    = 0;
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1766 {
1767   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   switch (op) {
1772   case MAT_NEW_NONZERO_LOCATIONS:
1773   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1774   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1775   case MAT_KEEP_NONZERO_PATTERN:
1776   case MAT_NEW_NONZERO_LOCATION_ERR:
1777   case MAT_USE_INODES:
1778   case MAT_IGNORE_ZERO_ENTRIES:
1779     MatCheckPreallocated(A,1);
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1782     break;
1783   case MAT_ROW_ORIENTED:
1784     MatCheckPreallocated(A,1);
1785     a->roworiented = flg;
1786 
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_NEW_DIAGONALS:
1791     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1792     break;
1793   case MAT_IGNORE_OFF_PROC_ENTRIES:
1794     a->donotstash = flg;
1795     break;
1796   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1797   case MAT_SPD:
1798   case MAT_SYMMETRIC:
1799   case MAT_STRUCTURALLY_SYMMETRIC:
1800   case MAT_HERMITIAN:
1801   case MAT_SYMMETRY_ETERNAL:
1802     break;
1803   case MAT_SUBMAT_SINGLEIS:
1804     A->submat_singleis = flg;
1805     break;
1806   case MAT_STRUCTURE_ONLY:
1807     /* The option is handled directly by MatSetOption() */
1808     break;
1809   default:
1810     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1811   }
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1816 {
1817   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1818   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1819   PetscErrorCode ierr;
1820   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1821   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1822   PetscInt       *cmap,*idx_p;
1823 
1824   PetscFunctionBegin;
1825   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1826   mat->getrowactive = PETSC_TRUE;
1827 
1828   if (!mat->rowvalues && (idx || v)) {
1829     /*
1830         allocate enough space to hold information from the longest row.
1831     */
1832     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1833     PetscInt   max = 1,tmp;
1834     for (i=0; i<matin->rmap->n; i++) {
1835       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1836       if (max < tmp) max = tmp;
1837     }
1838     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1839   }
1840 
1841   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1842   lrow = row - rstart;
1843 
1844   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1845   if (!v)   {pvA = 0; pvB = 0;}
1846   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1847   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1848   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1849   nztot = nzA + nzB;
1850 
1851   cmap = mat->garray;
1852   if (v  || idx) {
1853     if (nztot) {
1854       /* Sort by increasing column numbers, assuming A and B already sorted */
1855       PetscInt imark = -1;
1856       if (v) {
1857         *v = v_p = mat->rowvalues;
1858         for (i=0; i<nzB; i++) {
1859           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1860           else break;
1861         }
1862         imark = i;
1863         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1864         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1865       }
1866       if (idx) {
1867         *idx = idx_p = mat->rowindices;
1868         if (imark > -1) {
1869           for (i=0; i<imark; i++) {
1870             idx_p[i] = cmap[cworkB[i]];
1871           }
1872         } else {
1873           for (i=0; i<nzB; i++) {
1874             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1875             else break;
1876           }
1877           imark = i;
1878         }
1879         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1880         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1881       }
1882     } else {
1883       if (idx) *idx = 0;
1884       if (v)   *v   = 0;
1885     }
1886   }
1887   *nz  = nztot;
1888   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1889   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1894 {
1895   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1896 
1897   PetscFunctionBegin;
1898   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1899   aij->getrowactive = PETSC_FALSE;
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1904 {
1905   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1906   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1907   PetscErrorCode ierr;
1908   PetscInt       i,j,cstart = mat->cmap->rstart;
1909   PetscReal      sum = 0.0;
1910   MatScalar      *v;
1911 
1912   PetscFunctionBegin;
1913   if (aij->size == 1) {
1914     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1915   } else {
1916     if (type == NORM_FROBENIUS) {
1917       v = amat->a;
1918       for (i=0; i<amat->nz; i++) {
1919         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1920       }
1921       v = bmat->a;
1922       for (i=0; i<bmat->nz; i++) {
1923         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1924       }
1925       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1926       *norm = PetscSqrtReal(*norm);
1927       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1928     } else if (type == NORM_1) { /* max column norm */
1929       PetscReal *tmp,*tmp2;
1930       PetscInt  *jj,*garray = aij->garray;
1931       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1932       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1933       *norm = 0.0;
1934       v     = amat->a; jj = amat->j;
1935       for (j=0; j<amat->nz; j++) {
1936         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1937       }
1938       v = bmat->a; jj = bmat->j;
1939       for (j=0; j<bmat->nz; j++) {
1940         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1941       }
1942       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1943       for (j=0; j<mat->cmap->N; j++) {
1944         if (tmp2[j] > *norm) *norm = tmp2[j];
1945       }
1946       ierr = PetscFree(tmp);CHKERRQ(ierr);
1947       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1948       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1949     } else if (type == NORM_INFINITY) { /* max row norm */
1950       PetscReal ntemp = 0.0;
1951       for (j=0; j<aij->A->rmap->n; j++) {
1952         v   = amat->a + amat->i[j];
1953         sum = 0.0;
1954         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1955           sum += PetscAbsScalar(*v); v++;
1956         }
1957         v = bmat->a + bmat->i[j];
1958         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1959           sum += PetscAbsScalar(*v); v++;
1960         }
1961         if (sum > ntemp) ntemp = sum;
1962       }
1963       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1964       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1965     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1966   }
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1971 {
1972   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
1973   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1974   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
1975   PetscErrorCode ierr;
1976   Mat            B,A_diag,*B_diag;
1977   MatScalar      *array;
1978 
1979   PetscFunctionBegin;
1980   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1981   ai = Aloc->i; aj = Aloc->j;
1982   bi = Bloc->i; bj = Bloc->j;
1983   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1984     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1985     PetscSFNode          *oloc;
1986     PETSC_UNUSED PetscSF sf;
1987 
1988     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1989     /* compute d_nnz for preallocation */
1990     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1991     for (i=0; i<ai[ma]; i++) {
1992       d_nnz[aj[i]]++;
1993     }
1994     /* compute local off-diagonal contributions */
1995     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1996     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1997     /* map those to global */
1998     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1999     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2000     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2001     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2002     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2003     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2004     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2005 
2006     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2007     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2008     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2009     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2010     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2011     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2012   } else {
2013     B    = *matout;
2014     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2015   }
2016 
2017   b           = (Mat_MPIAIJ*)B->data;
2018   A_diag      = a->A;
2019   B_diag      = &b->A;
2020   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2021   A_diag_ncol = A_diag->cmap->N;
2022   B_diag_ilen = sub_B_diag->ilen;
2023   B_diag_i    = sub_B_diag->i;
2024 
2025   /* Set ilen for diagonal of B */
2026   for (i=0; i<A_diag_ncol; i++) {
2027     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2028   }
2029 
2030   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2031   very quickly (=without using MatSetValues), because all writes are local. */
2032   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2033 
2034   /* copy over the B part */
2035   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2036   array = Bloc->a;
2037   row   = A->rmap->rstart;
2038   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2039   cols_tmp = cols;
2040   for (i=0; i<mb; i++) {
2041     ncol = bi[i+1]-bi[i];
2042     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2043     row++;
2044     array += ncol; cols_tmp += ncol;
2045   }
2046   ierr = PetscFree(cols);CHKERRQ(ierr);
2047 
2048   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2049   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2050   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2051     *matout = B;
2052   } else {
2053     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2054   }
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2059 {
2060   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2061   Mat            a    = aij->A,b = aij->B;
2062   PetscErrorCode ierr;
2063   PetscInt       s1,s2,s3;
2064 
2065   PetscFunctionBegin;
2066   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2067   if (rr) {
2068     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2069     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2070     /* Overlap communication with computation. */
2071     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2072   }
2073   if (ll) {
2074     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2075     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2076     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2077   }
2078   /* scale  the diagonal block */
2079   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2080 
2081   if (rr) {
2082     /* Do a scatter end and then right scale the off-diagonal block */
2083     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2084     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2090 {
2091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2092   PetscErrorCode ierr;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2122   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2123 
2124   PetscFunctionBegin;
2125   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2126   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2127     /* because of the column compression in the off-processor part of the matrix a->B,
2128        the number of columns in a->B and b->B may be different, hence we cannot call
2129        the MatCopy() directly on the two parts. If need be, we can provide a more
2130        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2131        then copying the submatrices */
2132     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2133   } else {
2134     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2135     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2136   }
2137   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2142 {
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /*
2151    Computes the number of nonzeros per row needed for preallocation when X and Y
2152    have different nonzero structure.
2153 */
2154 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2155 {
2156   PetscInt       i,j,k,nzx,nzy;
2157 
2158   PetscFunctionBegin;
2159   /* Set the number of nonzeros in the new matrix */
2160   for (i=0; i<m; i++) {
2161     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2162     nzx = xi[i+1] - xi[i];
2163     nzy = yi[i+1] - yi[i];
2164     nnz[i] = 0;
2165     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2166       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2167       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2168       nnz[i]++;
2169     }
2170     for (; k<nzy; k++) nnz[i]++;
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2176 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2177 {
2178   PetscErrorCode ierr;
2179   PetscInt       m = Y->rmap->N;
2180   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2181   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2189 {
2190   PetscErrorCode ierr;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x    = (Mat_SeqAIJ*)xx->A->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     y    = (Mat_SeqAIJ*)yy->A->data;
2201     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2202     x    = (Mat_SeqAIJ*)xx->B->data;
2203     y    = (Mat_SeqAIJ*)yy->B->data;
2204     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2205     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2206     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2207   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2208     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2209   } else {
2210     Mat      B;
2211     PetscInt *nnz_d,*nnz_o;
2212     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2213     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2214     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2215     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2216     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2217     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2218     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2219     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2220     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2221     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2222     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2223     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2224     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2225     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2226   }
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2231 
2232 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2233 {
2234 #if defined(PETSC_USE_COMPLEX)
2235   PetscErrorCode ierr;
2236   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2237 
2238   PetscFunctionBegin;
2239   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2240   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2241 #else
2242   PetscFunctionBegin;
2243 #endif
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2248 {
2249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2250   PetscErrorCode ierr;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2254   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2259 {
2260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2265   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2270 {
2271   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2272   PetscErrorCode ierr;
2273   PetscInt       i,*idxb = 0;
2274   PetscScalar    *va,*vb;
2275   Vec            vtmp;
2276 
2277   PetscFunctionBegin;
2278   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2279   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2280   if (idx) {
2281     for (i=0; i<A->rmap->n; i++) {
2282       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2283     }
2284   }
2285 
2286   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2287   if (idx) {
2288     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2289   }
2290   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2291   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2292 
2293   for (i=0; i<A->rmap->n; i++) {
2294     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2295       va[i] = vb[i];
2296       if (idx) idx[i] = a->garray[idxb[i]];
2297     }
2298   }
2299 
2300   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2301   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2302   ierr = PetscFree(idxb);CHKERRQ(ierr);
2303   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2308 {
2309   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2310   PetscErrorCode ierr;
2311   PetscInt       i,*idxb = 0;
2312   PetscScalar    *va,*vb;
2313   Vec            vtmp;
2314 
2315   PetscFunctionBegin;
2316   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2317   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2318   if (idx) {
2319     for (i=0; i<A->cmap->n; i++) {
2320       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2321     }
2322   }
2323 
2324   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2325   if (idx) {
2326     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2327   }
2328   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2329   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2330 
2331   for (i=0; i<A->rmap->n; i++) {
2332     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2333       va[i] = vb[i];
2334       if (idx) idx[i] = a->garray[idxb[i]];
2335     }
2336   }
2337 
2338   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2339   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2340   ierr = PetscFree(idxb);CHKERRQ(ierr);
2341   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2346 {
2347   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2348   PetscInt       n      = A->rmap->n;
2349   PetscInt       cstart = A->cmap->rstart;
2350   PetscInt       *cmap  = mat->garray;
2351   PetscInt       *diagIdx, *offdiagIdx;
2352   Vec            diagV, offdiagV;
2353   PetscScalar    *a, *diagA, *offdiagA;
2354   PetscInt       r;
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2359   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2360   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2361   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2362   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2363   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2364   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2365   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2366   for (r = 0; r < n; ++r) {
2367     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2368       a[r]   = diagA[r];
2369       idx[r] = cstart + diagIdx[r];
2370     } else {
2371       a[r]   = offdiagA[r];
2372       idx[r] = cmap[offdiagIdx[r]];
2373     }
2374   }
2375   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2378   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2379   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2380   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2385 {
2386   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2387   PetscInt       n      = A->rmap->n;
2388   PetscInt       cstart = A->cmap->rstart;
2389   PetscInt       *cmap  = mat->garray;
2390   PetscInt       *diagIdx, *offdiagIdx;
2391   Vec            diagV, offdiagV;
2392   PetscScalar    *a, *diagA, *offdiagA;
2393   PetscInt       r;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2398   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2399   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2400   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2401   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2402   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2403   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2404   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2405   for (r = 0; r < n; ++r) {
2406     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2407       a[r]   = diagA[r];
2408       idx[r] = cstart + diagIdx[r];
2409     } else {
2410       a[r]   = offdiagA[r];
2411       idx[r] = cmap[offdiagIdx[r]];
2412     }
2413   }
2414   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2415   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2416   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2417   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2418   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2419   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2424 {
2425   PetscErrorCode ierr;
2426   Mat            *dummy;
2427 
2428   PetscFunctionBegin;
2429   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2430   *newmat = *dummy;
2431   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2436 {
2437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2438   PetscErrorCode ierr;
2439 
2440   PetscFunctionBegin;
2441   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2442   A->factorerrortype = a->A->factorerrortype;
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2447 {
2448   PetscErrorCode ierr;
2449   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2450 
2451   PetscFunctionBegin;
2452   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2453   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2454   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2455   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2460 {
2461   PetscFunctionBegin;
2462   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2463   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 /*@
2468    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2469 
2470    Collective on Mat
2471 
2472    Input Parameters:
2473 +    A - the matrix
2474 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2475 
2476  Level: advanced
2477 
2478 @*/
2479 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2480 {
2481   PetscErrorCode       ierr;
2482 
2483   PetscFunctionBegin;
2484   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2489 {
2490   PetscErrorCode       ierr;
2491   PetscBool            sc = PETSC_FALSE,flg;
2492 
2493   PetscFunctionBegin;
2494   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2495   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2496   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2497   if (flg) {
2498     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2499   }
2500   ierr = PetscOptionsTail();CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2505 {
2506   PetscErrorCode ierr;
2507   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2508   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2509 
2510   PetscFunctionBegin;
2511   if (!Y->preallocated) {
2512     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2513   } else if (!aij->nz) {
2514     PetscInt nonew = aij->nonew;
2515     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2516     aij->nonew = nonew;
2517   }
2518   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2523 {
2524   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2525   PetscErrorCode ierr;
2526 
2527   PetscFunctionBegin;
2528   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2529   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2530   if (d) {
2531     PetscInt rstart;
2532     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2533     *d += rstart;
2534 
2535   }
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2540 {
2541   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2542   PetscErrorCode ierr;
2543 
2544   PetscFunctionBegin;
2545   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /* -------------------------------------------------------------------*/
2550 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2551                                        MatGetRow_MPIAIJ,
2552                                        MatRestoreRow_MPIAIJ,
2553                                        MatMult_MPIAIJ,
2554                                 /* 4*/ MatMultAdd_MPIAIJ,
2555                                        MatMultTranspose_MPIAIJ,
2556                                        MatMultTransposeAdd_MPIAIJ,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                 /*10*/ 0,
2561                                        0,
2562                                        0,
2563                                        MatSOR_MPIAIJ,
2564                                        MatTranspose_MPIAIJ,
2565                                 /*15*/ MatGetInfo_MPIAIJ,
2566                                        MatEqual_MPIAIJ,
2567                                        MatGetDiagonal_MPIAIJ,
2568                                        MatDiagonalScale_MPIAIJ,
2569                                        MatNorm_MPIAIJ,
2570                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2571                                        MatAssemblyEnd_MPIAIJ,
2572                                        MatSetOption_MPIAIJ,
2573                                        MatZeroEntries_MPIAIJ,
2574                                 /*24*/ MatZeroRows_MPIAIJ,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                        0,
2579                                 /*29*/ MatSetUp_MPIAIJ,
2580                                        0,
2581                                        0,
2582                                        MatGetDiagonalBlock_MPIAIJ,
2583                                        0,
2584                                 /*34*/ MatDuplicate_MPIAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*39*/ MatAXPY_MPIAIJ,
2590                                        MatCreateSubMatrices_MPIAIJ,
2591                                        MatIncreaseOverlap_MPIAIJ,
2592                                        MatGetValues_MPIAIJ,
2593                                        MatCopy_MPIAIJ,
2594                                 /*44*/ MatGetRowMax_MPIAIJ,
2595                                        MatScale_MPIAIJ,
2596                                        MatShift_MPIAIJ,
2597                                        MatDiagonalSet_MPIAIJ,
2598                                        MatZeroRowsColumns_MPIAIJ,
2599                                 /*49*/ MatSetRandom_MPIAIJ,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2605                                        0,
2606                                        MatSetUnfactored_MPIAIJ,
2607                                        MatPermute_MPIAIJ,
2608                                        0,
2609                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2610                                        MatDestroy_MPIAIJ,
2611                                        MatView_MPIAIJ,
2612                                        0,
2613                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2614                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2615                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2620                                        MatGetRowMinAbs_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*75*/ MatFDColoringApply_AIJ,
2626                                        MatSetFromOptions_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        MatFindZeroDiagonals_MPIAIJ,
2630                                 /*80*/ 0,
2631                                        0,
2632                                        0,
2633                                 /*83*/ MatLoad_MPIAIJ,
2634                                        MatIsSymmetric_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2640                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2641                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2642                                        MatPtAP_MPIAIJ_MPIAIJ,
2643                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2644                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*99*/ 0,
2650                                        0,
2651                                        0,
2652                                        MatConjugate_MPIAIJ,
2653                                        0,
2654                                 /*104*/MatSetValuesRow_MPIAIJ,
2655                                        MatRealPart_MPIAIJ,
2656                                        MatImaginaryPart_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                 /*109*/0,
2660                                        0,
2661                                        MatGetRowMin_MPIAIJ,
2662                                        0,
2663                                        MatMissingDiagonal_MPIAIJ,
2664                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2665                                        0,
2666                                        MatGetGhosts_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                 /*119*/0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        MatGetMultiProcBlock_MPIAIJ,
2674                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2675                                        MatGetColumnNorms_MPIAIJ,
2676                                        MatInvertBlockDiagonal_MPIAIJ,
2677                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2678                                        MatCreateSubMatricesMPI_MPIAIJ,
2679                                 /*129*/0,
2680                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2681                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2682                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2683                                        0,
2684                                 /*134*/0,
2685                                        0,
2686                                        MatRARt_MPIAIJ_MPIAIJ,
2687                                        0,
2688                                        0,
2689                                 /*139*/MatSetBlockSizes_MPIAIJ,
2690                                        0,
2691                                        0,
2692                                        MatFDColoringSetUp_MPIXAIJ,
2693                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2694                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2695 };
2696 
2697 /* ----------------------------------------------------------------------------------------*/
2698 
2699 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2700 {
2701   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2706   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2711 {
2712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2713   PetscErrorCode ierr;
2714 
2715   PetscFunctionBegin;
2716   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2717   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2722 {
2723   Mat_MPIAIJ     *b;
2724   PetscErrorCode ierr;
2725 
2726   PetscFunctionBegin;
2727   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2728   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2729   b = (Mat_MPIAIJ*)B->data;
2730 
2731 #if defined(PETSC_USE_CTABLE)
2732   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2733 #else
2734   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2735 #endif
2736   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2737   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2738   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2739 
2740   /* Because the B will have been resized we simply destroy it and create a new one each time */
2741   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2742   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2743   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2744   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2745   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2746   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2747 
2748   if (!B->preallocated) {
2749     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2750     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2751     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2752     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2753     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2754   }
2755 
2756   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2757   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2758   B->preallocated  = PETSC_TRUE;
2759   B->was_assembled = PETSC_FALSE;
2760   B->assembled     = PETSC_FALSE;
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2765 {
2766   Mat_MPIAIJ     *b;
2767   PetscErrorCode ierr;
2768 
2769   PetscFunctionBegin;
2770   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   b = (Mat_MPIAIJ*)B->data;
2774 
2775 #if defined(PETSC_USE_CTABLE)
2776   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2777 #else
2778   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2779 #endif
2780   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2781   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2782   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2783 
2784   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2785   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2786   B->preallocated  = PETSC_TRUE;
2787   B->was_assembled = PETSC_FALSE;
2788   B->assembled = PETSC_FALSE;
2789   PetscFunctionReturn(0);
2790 }
2791 
2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2793 {
2794   Mat            mat;
2795   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   *newmat = 0;
2800   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2801   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2802   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2803   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2804   a       = (Mat_MPIAIJ*)mat->data;
2805 
2806   mat->factortype   = matin->factortype;
2807   mat->assembled    = PETSC_TRUE;
2808   mat->insertmode   = NOT_SET_VALUES;
2809   mat->preallocated = PETSC_TRUE;
2810 
2811   a->size         = oldmat->size;
2812   a->rank         = oldmat->rank;
2813   a->donotstash   = oldmat->donotstash;
2814   a->roworiented  = oldmat->roworiented;
2815   a->rowindices   = 0;
2816   a->rowvalues    = 0;
2817   a->getrowactive = PETSC_FALSE;
2818 
2819   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2821 
2822   if (oldmat->colmap) {
2823 #if defined(PETSC_USE_CTABLE)
2824     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2825 #else
2826     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2827     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2828     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2829 #endif
2830   } else a->colmap = 0;
2831   if (oldmat->garray) {
2832     PetscInt len;
2833     len  = oldmat->B->cmap->n;
2834     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2835     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2836     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2837   } else a->garray = 0;
2838 
2839   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2840   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2841   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2842   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2843 
2844   if (oldmat->Mvctx_mpi1) {
2845     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2846     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2847   }
2848 
2849   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2850   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2851   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2852   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2853   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2854   *newmat = mat;
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2859 {
2860   PetscBool      isbinary, ishdf5;
2861   PetscErrorCode ierr;
2862 
2863   PetscFunctionBegin;
2864   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2865   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2866   /* force binary viewer to load .info file if it has not yet done so */
2867   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2868   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2869   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2870   if (isbinary) {
2871     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2872   } else if (ishdf5) {
2873 #if defined(PETSC_HAVE_HDF5)
2874     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2875 #else
2876     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2877 #endif
2878   } else {
2879     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2880   }
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2885 {
2886   PetscScalar    *vals,*svals;
2887   MPI_Comm       comm;
2888   PetscErrorCode ierr;
2889   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2890   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2891   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2892   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2893   PetscInt       cend,cstart,n,*rowners;
2894   int            fd;
2895   PetscInt       bs = newMat->rmap->bs;
2896 
2897   PetscFunctionBegin;
2898   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2900   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2901   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2902   if (!rank) {
2903     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2904     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2905     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2906   }
2907 
2908   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2909   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2910   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2911   if (bs < 0) bs = 1;
2912 
2913   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2914   M    = header[1]; N = header[2];
2915 
2916   /* If global sizes are set, check if they are consistent with that given in the file */
2917   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2918   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2919 
2920   /* determine ownership of all (block) rows */
2921   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2922   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2923   else m = newMat->rmap->n; /* Set by user */
2924 
2925   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2926   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2927 
2928   /* First process needs enough room for process with most rows */
2929   if (!rank) {
2930     mmax = rowners[1];
2931     for (i=2; i<=size; i++) {
2932       mmax = PetscMax(mmax, rowners[i]);
2933     }
2934   } else mmax = -1;             /* unused, but compilers complain */
2935 
2936   rowners[0] = 0;
2937   for (i=2; i<=size; i++) {
2938     rowners[i] += rowners[i-1];
2939   }
2940   rstart = rowners[rank];
2941   rend   = rowners[rank+1];
2942 
2943   /* distribute row lengths to all processors */
2944   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2945   if (!rank) {
2946     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2947     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2948     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2949     for (j=0; j<m; j++) {
2950       procsnz[0] += ourlens[j];
2951     }
2952     for (i=1; i<size; i++) {
2953       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2954       /* calculate the number of nonzeros on each processor */
2955       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2956         procsnz[i] += rowlengths[j];
2957       }
2958       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2959     }
2960     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2961   } else {
2962     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2963   }
2964 
2965   if (!rank) {
2966     /* determine max buffer needed and allocate it */
2967     maxnz = 0;
2968     for (i=0; i<size; i++) {
2969       maxnz = PetscMax(maxnz,procsnz[i]);
2970     }
2971     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2972 
2973     /* read in my part of the matrix column indices  */
2974     nz   = procsnz[0];
2975     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2976     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2977 
2978     /* read in every one elses and ship off */
2979     for (i=1; i<size; i++) {
2980       nz   = procsnz[i];
2981       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2982       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2983     }
2984     ierr = PetscFree(cols);CHKERRQ(ierr);
2985   } else {
2986     /* determine buffer space needed for message */
2987     nz = 0;
2988     for (i=0; i<m; i++) {
2989       nz += ourlens[i];
2990     }
2991     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2992 
2993     /* receive message of column indices*/
2994     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2995   }
2996 
2997   /* determine column ownership if matrix is not square */
2998   if (N != M) {
2999     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3000     else n = newMat->cmap->n;
3001     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3002     cstart = cend - n;
3003   } else {
3004     cstart = rstart;
3005     cend   = rend;
3006     n      = cend - cstart;
3007   }
3008 
3009   /* loop over local rows, determining number of off diagonal entries */
3010   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3011   jj   = 0;
3012   for (i=0; i<m; i++) {
3013     for (j=0; j<ourlens[i]; j++) {
3014       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3015       jj++;
3016     }
3017   }
3018 
3019   for (i=0; i<m; i++) {
3020     ourlens[i] -= offlens[i];
3021   }
3022   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3023 
3024   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3025 
3026   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3027 
3028   for (i=0; i<m; i++) {
3029     ourlens[i] += offlens[i];
3030   }
3031 
3032   if (!rank) {
3033     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3034 
3035     /* read in my part of the matrix numerical values  */
3036     nz   = procsnz[0];
3037     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3038 
3039     /* insert into matrix */
3040     jj      = rstart;
3041     smycols = mycols;
3042     svals   = vals;
3043     for (i=0; i<m; i++) {
3044       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3045       smycols += ourlens[i];
3046       svals   += ourlens[i];
3047       jj++;
3048     }
3049 
3050     /* read in other processors and ship out */
3051     for (i=1; i<size; i++) {
3052       nz   = procsnz[i];
3053       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3054       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3055     }
3056     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3057   } else {
3058     /* receive numeric values */
3059     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3060 
3061     /* receive message of values*/
3062     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3063 
3064     /* insert into matrix */
3065     jj      = rstart;
3066     smycols = mycols;
3067     svals   = vals;
3068     for (i=0; i<m; i++) {
3069       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3070       smycols += ourlens[i];
3071       svals   += ourlens[i];
3072       jj++;
3073     }
3074   }
3075   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3076   ierr = PetscFree(vals);CHKERRQ(ierr);
3077   ierr = PetscFree(mycols);CHKERRQ(ierr);
3078   ierr = PetscFree(rowners);CHKERRQ(ierr);
3079   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3080   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 /* Not scalable because of ISAllGather() unless getting all columns. */
3085 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3086 {
3087   PetscErrorCode ierr;
3088   IS             iscol_local;
3089   PetscBool      isstride;
3090   PetscMPIInt    lisstride=0,gisstride;
3091 
3092   PetscFunctionBegin;
3093   /* check if we are grabbing all columns*/
3094   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3095 
3096   if (isstride) {
3097     PetscInt  start,len,mstart,mlen;
3098     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3099     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3100     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3101     if (mstart == start && mlen-mstart == len) lisstride = 1;
3102   }
3103 
3104   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3105   if (gisstride) {
3106     PetscInt N;
3107     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3108     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3109     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3110     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3111   } else {
3112     PetscInt cbs;
3113     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3114     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3115     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3116   }
3117 
3118   *isseq = iscol_local;
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 /*
3123  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3124  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3125 
3126  Input Parameters:
3127    mat - matrix
3128    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3129            i.e., mat->rstart <= isrow[i] < mat->rend
3130    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3131            i.e., mat->cstart <= iscol[i] < mat->cend
3132  Output Parameter:
3133    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3134    iscol_o - sequential column index set for retrieving mat->B
3135    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3136  */
3137 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3138 {
3139   PetscErrorCode ierr;
3140   Vec            x,cmap;
3141   const PetscInt *is_idx;
3142   PetscScalar    *xarray,*cmaparray;
3143   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3144   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3145   Mat            B=a->B;
3146   Vec            lvec=a->lvec,lcmap;
3147   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3148   MPI_Comm       comm;
3149   VecScatter     Mvctx=a->Mvctx;
3150 
3151   PetscFunctionBegin;
3152   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3153   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3154 
3155   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3156   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3157   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3158   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3159   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3160 
3161   /* Get start indices */
3162   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3163   isstart -= ncols;
3164   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3165 
3166   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3167   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3168   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3169   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3170   for (i=0; i<ncols; i++) {
3171     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3172     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3173     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3174   }
3175   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3176   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3177   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3178 
3179   /* Get iscol_d */
3180   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3181   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3182   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3183 
3184   /* Get isrow_d */
3185   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3186   rstart = mat->rmap->rstart;
3187   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3188   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3189   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3190   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3191 
3192   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3193   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3194   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3195 
3196   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3197   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3198   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3199 
3200   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3201 
3202   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3203   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3204 
3205   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3206   /* off-process column indices */
3207   count = 0;
3208   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3209   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3210 
3211   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3212   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3213   for (i=0; i<Bn; i++) {
3214     if (PetscRealPart(xarray[i]) > -1.0) {
3215       idx[count]     = i;                   /* local column index in off-diagonal part B */
3216       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3217       count++;
3218     }
3219   }
3220   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3221   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3222 
3223   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3224   /* cannot ensure iscol_o has same blocksize as iscol! */
3225 
3226   ierr = PetscFree(idx);CHKERRQ(ierr);
3227   *garray = cmap1;
3228 
3229   ierr = VecDestroy(&x);CHKERRQ(ierr);
3230   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3231   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3232   PetscFunctionReturn(0);
3233 }
3234 
3235 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3236 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3237 {
3238   PetscErrorCode ierr;
3239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3240   Mat            M = NULL;
3241   MPI_Comm       comm;
3242   IS             iscol_d,isrow_d,iscol_o;
3243   Mat            Asub = NULL,Bsub = NULL;
3244   PetscInt       n;
3245 
3246   PetscFunctionBegin;
3247   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3248 
3249   if (call == MAT_REUSE_MATRIX) {
3250     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3251     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3252     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3253 
3254     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3255     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3256 
3257     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3258     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3259 
3260     /* Update diagonal and off-diagonal portions of submat */
3261     asub = (Mat_MPIAIJ*)(*submat)->data;
3262     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3263     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3264     if (n) {
3265       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3266     }
3267     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3268     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3269 
3270   } else { /* call == MAT_INITIAL_MATRIX) */
3271     const PetscInt *garray;
3272     PetscInt        BsubN;
3273 
3274     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3275     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3276 
3277     /* Create local submatrices Asub and Bsub */
3278     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3279     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3280 
3281     /* Create submatrix M */
3282     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3283 
3284     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3285     asub = (Mat_MPIAIJ*)M->data;
3286 
3287     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3288     n = asub->B->cmap->N;
3289     if (BsubN > n) {
3290       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3291       const PetscInt *idx;
3292       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3293       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3294 
3295       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3296       j = 0;
3297       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3298       for (i=0; i<n; i++) {
3299         if (j >= BsubN) break;
3300         while (subgarray[i] > garray[j]) j++;
3301 
3302         if (subgarray[i] == garray[j]) {
3303           idx_new[i] = idx[j++];
3304         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3305       }
3306       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3307 
3308       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3309       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3310 
3311     } else if (BsubN < n) {
3312       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3313     }
3314 
3315     ierr = PetscFree(garray);CHKERRQ(ierr);
3316     *submat = M;
3317 
3318     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3319     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3320     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3321 
3322     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3323     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3324 
3325     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3326     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3327   }
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3332 {
3333   PetscErrorCode ierr;
3334   IS             iscol_local=NULL,isrow_d;
3335   PetscInt       csize;
3336   PetscInt       n,i,j,start,end;
3337   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3338   MPI_Comm       comm;
3339 
3340   PetscFunctionBegin;
3341   /* If isrow has same processor distribution as mat,
3342      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3343   if (call == MAT_REUSE_MATRIX) {
3344     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3345     if (isrow_d) {
3346       sameRowDist  = PETSC_TRUE;
3347       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3348     } else {
3349       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3350       if (iscol_local) {
3351         sameRowDist  = PETSC_TRUE;
3352         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3353       }
3354     }
3355   } else {
3356     /* Check if isrow has same processor distribution as mat */
3357     sameDist[0] = PETSC_FALSE;
3358     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3359     if (!n) {
3360       sameDist[0] = PETSC_TRUE;
3361     } else {
3362       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3363       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3364       if (i >= start && j < end) {
3365         sameDist[0] = PETSC_TRUE;
3366       }
3367     }
3368 
3369     /* Check if iscol has same processor distribution as mat */
3370     sameDist[1] = PETSC_FALSE;
3371     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3372     if (!n) {
3373       sameDist[1] = PETSC_TRUE;
3374     } else {
3375       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3376       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3377       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3378     }
3379 
3380     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3381     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3382     sameRowDist = tsameDist[0];
3383   }
3384 
3385   if (sameRowDist) {
3386     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3387       /* isrow and iscol have same processor distribution as mat */
3388       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3389       PetscFunctionReturn(0);
3390     } else { /* sameRowDist */
3391       /* isrow has same processor distribution as mat */
3392       if (call == MAT_INITIAL_MATRIX) {
3393         PetscBool sorted;
3394         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3395         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3396         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3397         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3398 
3399         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3400         if (sorted) {
3401           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3402           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3403           PetscFunctionReturn(0);
3404         }
3405       } else { /* call == MAT_REUSE_MATRIX */
3406         IS    iscol_sub;
3407         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3408         if (iscol_sub) {
3409           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3410           PetscFunctionReturn(0);
3411         }
3412       }
3413     }
3414   }
3415 
3416   /* General case: iscol -> iscol_local which has global size of iscol */
3417   if (call == MAT_REUSE_MATRIX) {
3418     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3419     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3420   } else {
3421     if (!iscol_local) {
3422       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3423     }
3424   }
3425 
3426   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3427   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3428 
3429   if (call == MAT_INITIAL_MATRIX) {
3430     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3431     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3432   }
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 /*@C
3437      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3438          and "off-diagonal" part of the matrix in CSR format.
3439 
3440    Collective on MPI_Comm
3441 
3442    Input Parameters:
3443 +  comm - MPI communicator
3444 .  A - "diagonal" portion of matrix
3445 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3446 -  garray - global index of B columns
3447 
3448    Output Parameter:
3449 .   mat - the matrix, with input A as its local diagonal matrix
3450    Level: advanced
3451 
3452    Notes:
3453        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3454        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3455 
3456 .seealso: MatCreateMPIAIJWithSplitArrays()
3457 @*/
3458 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3459 {
3460   PetscErrorCode ierr;
3461   Mat_MPIAIJ     *maij;
3462   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3463   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3464   PetscScalar    *oa=b->a;
3465   Mat            Bnew;
3466   PetscInt       m,n,N;
3467 
3468   PetscFunctionBegin;
3469   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3470   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3471   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3472   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3473   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3474   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3475 
3476   /* Get global columns of mat */
3477   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3478 
3479   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3480   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3481   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3482   maij = (Mat_MPIAIJ*)(*mat)->data;
3483 
3484   (*mat)->preallocated = PETSC_TRUE;
3485 
3486   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3487   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3488 
3489   /* Set A as diagonal portion of *mat */
3490   maij->A = A;
3491 
3492   nz = oi[m];
3493   for (i=0; i<nz; i++) {
3494     col   = oj[i];
3495     oj[i] = garray[col];
3496   }
3497 
3498    /* Set Bnew as off-diagonal portion of *mat */
3499   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3500   bnew        = (Mat_SeqAIJ*)Bnew->data;
3501   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3502   maij->B     = Bnew;
3503 
3504   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3505 
3506   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3507   b->free_a       = PETSC_FALSE;
3508   b->free_ij      = PETSC_FALSE;
3509   ierr = MatDestroy(&B);CHKERRQ(ierr);
3510 
3511   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3512   bnew->free_a       = PETSC_TRUE;
3513   bnew->free_ij      = PETSC_TRUE;
3514 
3515   /* condense columns of maij->B */
3516   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3517   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3518   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3519   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3520   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3521   PetscFunctionReturn(0);
3522 }
3523 
3524 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3525 
3526 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3527 {
3528   PetscErrorCode ierr;
3529   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3530   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3531   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3532   Mat            M,Msub,B=a->B;
3533   MatScalar      *aa;
3534   Mat_SeqAIJ     *aij;
3535   PetscInt       *garray = a->garray,*colsub,Ncols;
3536   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3537   IS             iscol_sub,iscmap;
3538   const PetscInt *is_idx,*cmap;
3539   PetscBool      allcolumns=PETSC_FALSE;
3540   MPI_Comm       comm;
3541 
3542   PetscFunctionBegin;
3543   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3544 
3545   if (call == MAT_REUSE_MATRIX) {
3546     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3547     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3548     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3549 
3550     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3551     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3552 
3553     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3554     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3555 
3556     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3557 
3558   } else { /* call == MAT_INITIAL_MATRIX) */
3559     PetscBool flg;
3560 
3561     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3562     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3563 
3564     /* (1) iscol -> nonscalable iscol_local */
3565     /* Check for special case: each processor gets entire matrix columns */
3566     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3567     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3568     if (allcolumns) {
3569       iscol_sub = iscol_local;
3570       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3571       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3572 
3573     } else {
3574       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3575       PetscInt *idx,*cmap1,k;
3576       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3577       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3578       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3579       count = 0;
3580       k     = 0;
3581       for (i=0; i<Ncols; i++) {
3582         j = is_idx[i];
3583         if (j >= cstart && j < cend) {
3584           /* diagonal part of mat */
3585           idx[count]     = j;
3586           cmap1[count++] = i; /* column index in submat */
3587         } else if (Bn) {
3588           /* off-diagonal part of mat */
3589           if (j == garray[k]) {
3590             idx[count]     = j;
3591             cmap1[count++] = i;  /* column index in submat */
3592           } else if (j > garray[k]) {
3593             while (j > garray[k] && k < Bn-1) k++;
3594             if (j == garray[k]) {
3595               idx[count]     = j;
3596               cmap1[count++] = i; /* column index in submat */
3597             }
3598           }
3599         }
3600       }
3601       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3602 
3603       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3604       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3605       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3606 
3607       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3608     }
3609 
3610     /* (3) Create sequential Msub */
3611     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3612   }
3613 
3614   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3615   aij  = (Mat_SeqAIJ*)(Msub)->data;
3616   ii   = aij->i;
3617   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3618 
3619   /*
3620       m - number of local rows
3621       Ncols - number of columns (same on all processors)
3622       rstart - first row in new global matrix generated
3623   */
3624   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3625 
3626   if (call == MAT_INITIAL_MATRIX) {
3627     /* (4) Create parallel newmat */
3628     PetscMPIInt    rank,size;
3629     PetscInt       csize;
3630 
3631     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3632     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3633 
3634     /*
3635         Determine the number of non-zeros in the diagonal and off-diagonal
3636         portions of the matrix in order to do correct preallocation
3637     */
3638 
3639     /* first get start and end of "diagonal" columns */
3640     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3641     if (csize == PETSC_DECIDE) {
3642       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3643       if (mglobal == Ncols) { /* square matrix */
3644         nlocal = m;
3645       } else {
3646         nlocal = Ncols/size + ((Ncols % size) > rank);
3647       }
3648     } else {
3649       nlocal = csize;
3650     }
3651     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3652     rstart = rend - nlocal;
3653     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3654 
3655     /* next, compute all the lengths */
3656     jj    = aij->j;
3657     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3658     olens = dlens + m;
3659     for (i=0; i<m; i++) {
3660       jend = ii[i+1] - ii[i];
3661       olen = 0;
3662       dlen = 0;
3663       for (j=0; j<jend; j++) {
3664         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3665         else dlen++;
3666         jj++;
3667       }
3668       olens[i] = olen;
3669       dlens[i] = dlen;
3670     }
3671 
3672     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3673     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3674 
3675     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3676     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3677     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3678     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3679     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3680     ierr = PetscFree(dlens);CHKERRQ(ierr);
3681 
3682   } else { /* call == MAT_REUSE_MATRIX */
3683     M    = *newmat;
3684     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3685     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3686     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3687     /*
3688          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3689        rather than the slower MatSetValues().
3690     */
3691     M->was_assembled = PETSC_TRUE;
3692     M->assembled     = PETSC_FALSE;
3693   }
3694 
3695   /* (5) Set values of Msub to *newmat */
3696   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3697   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3698 
3699   jj   = aij->j;
3700   aa   = aij->a;
3701   for (i=0; i<m; i++) {
3702     row = rstart + i;
3703     nz  = ii[i+1] - ii[i];
3704     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3705     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3706     jj += nz; aa += nz;
3707   }
3708   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3709 
3710   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712 
3713   ierr = PetscFree(colsub);CHKERRQ(ierr);
3714 
3715   /* save Msub, iscol_sub and iscmap used in processor for next request */
3716   if (call ==  MAT_INITIAL_MATRIX) {
3717     *newmat = M;
3718     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3719     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3720 
3721     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3722     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3723 
3724     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3725     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3726 
3727     if (iscol_local) {
3728       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3729       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3730     }
3731   }
3732   PetscFunctionReturn(0);
3733 }
3734 
3735 /*
3736     Not great since it makes two copies of the submatrix, first an SeqAIJ
3737   in local and then by concatenating the local matrices the end result.
3738   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3739 
3740   Note: This requires a sequential iscol with all indices.
3741 */
3742 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3743 {
3744   PetscErrorCode ierr;
3745   PetscMPIInt    rank,size;
3746   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3747   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3748   Mat            M,Mreuse;
3749   MatScalar      *aa,*vwork;
3750   MPI_Comm       comm;
3751   Mat_SeqAIJ     *aij;
3752   PetscBool      colflag,allcolumns=PETSC_FALSE;
3753 
3754   PetscFunctionBegin;
3755   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3756   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3757   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3758 
3759   /* Check for special case: each processor gets entire matrix columns */
3760   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3761   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3762   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3763 
3764   if (call ==  MAT_REUSE_MATRIX) {
3765     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3766     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3767     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3768   } else {
3769     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3770   }
3771 
3772   /*
3773       m - number of local rows
3774       n - number of columns (same on all processors)
3775       rstart - first row in new global matrix generated
3776   */
3777   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3778   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3779   if (call == MAT_INITIAL_MATRIX) {
3780     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3781     ii  = aij->i;
3782     jj  = aij->j;
3783 
3784     /*
3785         Determine the number of non-zeros in the diagonal and off-diagonal
3786         portions of the matrix in order to do correct preallocation
3787     */
3788 
3789     /* first get start and end of "diagonal" columns */
3790     if (csize == PETSC_DECIDE) {
3791       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3792       if (mglobal == n) { /* square matrix */
3793         nlocal = m;
3794       } else {
3795         nlocal = n/size + ((n % size) > rank);
3796       }
3797     } else {
3798       nlocal = csize;
3799     }
3800     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3801     rstart = rend - nlocal;
3802     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3803 
3804     /* next, compute all the lengths */
3805     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3806     olens = dlens + m;
3807     for (i=0; i<m; i++) {
3808       jend = ii[i+1] - ii[i];
3809       olen = 0;
3810       dlen = 0;
3811       for (j=0; j<jend; j++) {
3812         if (*jj < rstart || *jj >= rend) olen++;
3813         else dlen++;
3814         jj++;
3815       }
3816       olens[i] = olen;
3817       dlens[i] = dlen;
3818     }
3819     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3820     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3821     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3822     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3823     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3824     ierr = PetscFree(dlens);CHKERRQ(ierr);
3825   } else {
3826     PetscInt ml,nl;
3827 
3828     M    = *newmat;
3829     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3830     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3831     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3832     /*
3833          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3834        rather than the slower MatSetValues().
3835     */
3836     M->was_assembled = PETSC_TRUE;
3837     M->assembled     = PETSC_FALSE;
3838   }
3839   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3840   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3841   ii   = aij->i;
3842   jj   = aij->j;
3843   aa   = aij->a;
3844   for (i=0; i<m; i++) {
3845     row   = rstart + i;
3846     nz    = ii[i+1] - ii[i];
3847     cwork = jj;     jj += nz;
3848     vwork = aa;     aa += nz;
3849     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3850   }
3851 
3852   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3853   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3854   *newmat = M;
3855 
3856   /* save submatrix used in processor for next request */
3857   if (call ==  MAT_INITIAL_MATRIX) {
3858     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3859     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3860   }
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3865 {
3866   PetscInt       m,cstart, cend,j,nnz,i,d;
3867   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3868   const PetscInt *JJ;
3869   PetscScalar    *values;
3870   PetscErrorCode ierr;
3871   PetscBool      nooffprocentries;
3872 
3873   PetscFunctionBegin;
3874   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3875 
3876   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3877   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3878   m      = B->rmap->n;
3879   cstart = B->cmap->rstart;
3880   cend   = B->cmap->rend;
3881   rstart = B->rmap->rstart;
3882 
3883   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3884 
3885 #if defined(PETSC_USE_DEBUG)
3886   for (i=0; i<m && Ii; i++) {
3887     nnz = Ii[i+1]- Ii[i];
3888     JJ  = J + Ii[i];
3889     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3890     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3891     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3892   }
3893 #endif
3894 
3895   for (i=0; i<m && Ii; i++) {
3896     nnz     = Ii[i+1]- Ii[i];
3897     JJ      = J + Ii[i];
3898     nnz_max = PetscMax(nnz_max,nnz);
3899     d       = 0;
3900     for (j=0; j<nnz; j++) {
3901       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3902     }
3903     d_nnz[i] = d;
3904     o_nnz[i] = nnz - d;
3905   }
3906   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3907   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3908 
3909   if (v) values = (PetscScalar*)v;
3910   else {
3911     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3912   }
3913 
3914   for (i=0; i<m && Ii; i++) {
3915     ii   = i + rstart;
3916     nnz  = Ii[i+1]- Ii[i];
3917     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3918   }
3919   nooffprocentries    = B->nooffprocentries;
3920   B->nooffprocentries = PETSC_TRUE;
3921   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3922   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3923   B->nooffprocentries = nooffprocentries;
3924 
3925   if (!v) {
3926     ierr = PetscFree(values);CHKERRQ(ierr);
3927   }
3928   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3929   PetscFunctionReturn(0);
3930 }
3931 
3932 /*@
3933    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3934    (the default parallel PETSc format).
3935 
3936    Collective on MPI_Comm
3937 
3938    Input Parameters:
3939 +  B - the matrix
3940 .  i - the indices into j for the start of each local row (starts with zero)
3941 .  j - the column indices for each local row (starts with zero)
3942 -  v - optional values in the matrix
3943 
3944    Level: developer
3945 
3946    Notes:
3947        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3948      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3949      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3950 
3951        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3952 
3953        The format which is used for the sparse matrix input, is equivalent to a
3954     row-major ordering.. i.e for the following matrix, the input data expected is
3955     as shown
3956 
3957 $        1 0 0
3958 $        2 0 3     P0
3959 $       -------
3960 $        4 5 6     P1
3961 $
3962 $     Process0 [P0]: rows_owned=[0,1]
3963 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3964 $        j =  {0,0,2}  [size = 3]
3965 $        v =  {1,2,3}  [size = 3]
3966 $
3967 $     Process1 [P1]: rows_owned=[2]
3968 $        i =  {0,3}    [size = nrow+1  = 1+1]
3969 $        j =  {0,1,2}  [size = 3]
3970 $        v =  {4,5,6}  [size = 3]
3971 
3972 .keywords: matrix, aij, compressed row, sparse, parallel
3973 
3974 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3975           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3976 @*/
3977 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3978 {
3979   PetscErrorCode ierr;
3980 
3981   PetscFunctionBegin;
3982   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3983   PetscFunctionReturn(0);
3984 }
3985 
3986 /*@C
3987    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3988    (the default parallel PETSc format).  For good matrix assembly performance
3989    the user should preallocate the matrix storage by setting the parameters
3990    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3991    performance can be increased by more than a factor of 50.
3992 
3993    Collective on MPI_Comm
3994 
3995    Input Parameters:
3996 +  B - the matrix
3997 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3998            (same value is used for all local rows)
3999 .  d_nnz - array containing the number of nonzeros in the various rows of the
4000            DIAGONAL portion of the local submatrix (possibly different for each row)
4001            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4002            The size of this array is equal to the number of local rows, i.e 'm'.
4003            For matrices that will be factored, you must leave room for (and set)
4004            the diagonal entry even if it is zero.
4005 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4006            submatrix (same value is used for all local rows).
4007 -  o_nnz - array containing the number of nonzeros in the various rows of the
4008            OFF-DIAGONAL portion of the local submatrix (possibly different for
4009            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4010            structure. The size of this array is equal to the number
4011            of local rows, i.e 'm'.
4012 
4013    If the *_nnz parameter is given then the *_nz parameter is ignored
4014 
4015    The AIJ format (also called the Yale sparse matrix format or
4016    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4017    storage.  The stored row and column indices begin with zero.
4018    See Users-Manual: ch_mat for details.
4019 
4020    The parallel matrix is partitioned such that the first m0 rows belong to
4021    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4022    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4023 
4024    The DIAGONAL portion of the local submatrix of a processor can be defined
4025    as the submatrix which is obtained by extraction the part corresponding to
4026    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4027    first row that belongs to the processor, r2 is the last row belonging to
4028    the this processor, and c1-c2 is range of indices of the local part of a
4029    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4030    common case of a square matrix, the row and column ranges are the same and
4031    the DIAGONAL part is also square. The remaining portion of the local
4032    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4033 
4034    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4035 
4036    You can call MatGetInfo() to get information on how effective the preallocation was;
4037    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4038    You can also run with the option -info and look for messages with the string
4039    malloc in them to see if additional memory allocation was needed.
4040 
4041    Example usage:
4042 
4043    Consider the following 8x8 matrix with 34 non-zero values, that is
4044    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4045    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4046    as follows:
4047 
4048 .vb
4049             1  2  0  |  0  3  0  |  0  4
4050     Proc0   0  5  6  |  7  0  0  |  8  0
4051             9  0 10  | 11  0  0  | 12  0
4052     -------------------------------------
4053            13  0 14  | 15 16 17  |  0  0
4054     Proc1   0 18  0  | 19 20 21  |  0  0
4055             0  0  0  | 22 23  0  | 24  0
4056     -------------------------------------
4057     Proc2  25 26 27  |  0  0 28  | 29  0
4058            30  0  0  | 31 32 33  |  0 34
4059 .ve
4060 
4061    This can be represented as a collection of submatrices as:
4062 
4063 .vb
4064       A B C
4065       D E F
4066       G H I
4067 .ve
4068 
4069    Where the submatrices A,B,C are owned by proc0, D,E,F are
4070    owned by proc1, G,H,I are owned by proc2.
4071 
4072    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4073    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4074    The 'M','N' parameters are 8,8, and have the same values on all procs.
4075 
4076    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4077    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4078    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4079    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4080    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4081    matrix, ans [DF] as another SeqAIJ matrix.
4082 
4083    When d_nz, o_nz parameters are specified, d_nz storage elements are
4084    allocated for every row of the local diagonal submatrix, and o_nz
4085    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4086    One way to choose d_nz and o_nz is to use the max nonzerors per local
4087    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4088    In this case, the values of d_nz,o_nz are:
4089 .vb
4090      proc0 : dnz = 2, o_nz = 2
4091      proc1 : dnz = 3, o_nz = 2
4092      proc2 : dnz = 1, o_nz = 4
4093 .ve
4094    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4095    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4096    for proc3. i.e we are using 12+15+10=37 storage locations to store
4097    34 values.
4098 
4099    When d_nnz, o_nnz parameters are specified, the storage is specified
4100    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4101    In the above case the values for d_nnz,o_nnz are:
4102 .vb
4103      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4104      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4105      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4106 .ve
4107    Here the space allocated is sum of all the above values i.e 34, and
4108    hence pre-allocation is perfect.
4109 
4110    Level: intermediate
4111 
4112 .keywords: matrix, aij, compressed row, sparse, parallel
4113 
4114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4115           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4116 @*/
4117 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4118 {
4119   PetscErrorCode ierr;
4120 
4121   PetscFunctionBegin;
4122   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4123   PetscValidType(B,1);
4124   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4125   PetscFunctionReturn(0);
4126 }
4127 
4128 /*@
4129      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4130          CSR format the local rows.
4131 
4132    Collective on MPI_Comm
4133 
4134    Input Parameters:
4135 +  comm - MPI communicator
4136 .  m - number of local rows (Cannot be PETSC_DECIDE)
4137 .  n - This value should be the same as the local size used in creating the
4138        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4139        calculated if N is given) For square matrices n is almost always m.
4140 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4141 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4142 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4143 .   j - column indices
4144 -   a - matrix values
4145 
4146    Output Parameter:
4147 .   mat - the matrix
4148 
4149    Level: intermediate
4150 
4151    Notes:
4152        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4153      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4154      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4155 
4156        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4157 
4158        The format which is used for the sparse matrix input, is equivalent to a
4159     row-major ordering.. i.e for the following matrix, the input data expected is
4160     as shown
4161 
4162 $        1 0 0
4163 $        2 0 3     P0
4164 $       -------
4165 $        4 5 6     P1
4166 $
4167 $     Process0 [P0]: rows_owned=[0,1]
4168 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4169 $        j =  {0,0,2}  [size = 3]
4170 $        v =  {1,2,3}  [size = 3]
4171 $
4172 $     Process1 [P1]: rows_owned=[2]
4173 $        i =  {0,3}    [size = nrow+1  = 1+1]
4174 $        j =  {0,1,2}  [size = 3]
4175 $        v =  {4,5,6}  [size = 3]
4176 
4177 .keywords: matrix, aij, compressed row, sparse, parallel
4178 
4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4180           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4181 @*/
4182 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4183 {
4184   PetscErrorCode ierr;
4185 
4186   PetscFunctionBegin;
4187   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4188   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4189   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4190   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4191   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4192   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4193   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4194   PetscFunctionReturn(0);
4195 }
4196 
4197 /*@C
4198    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4199    (the default parallel PETSc format).  For good matrix assembly performance
4200    the user should preallocate the matrix storage by setting the parameters
4201    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4202    performance can be increased by more than a factor of 50.
4203 
4204    Collective on MPI_Comm
4205 
4206    Input Parameters:
4207 +  comm - MPI communicator
4208 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4209            This value should be the same as the local size used in creating the
4210            y vector for the matrix-vector product y = Ax.
4211 .  n - This value should be the same as the local size used in creating the
4212        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4213        calculated if N is given) For square matrices n is almost always m.
4214 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4215 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4216 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4217            (same value is used for all local rows)
4218 .  d_nnz - array containing the number of nonzeros in the various rows of the
4219            DIAGONAL portion of the local submatrix (possibly different for each row)
4220            or NULL, if d_nz is used to specify the nonzero structure.
4221            The size of this array is equal to the number of local rows, i.e 'm'.
4222 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4223            submatrix (same value is used for all local rows).
4224 -  o_nnz - array containing the number of nonzeros in the various rows of the
4225            OFF-DIAGONAL portion of the local submatrix (possibly different for
4226            each row) or NULL, if o_nz is used to specify the nonzero
4227            structure. The size of this array is equal to the number
4228            of local rows, i.e 'm'.
4229 
4230    Output Parameter:
4231 .  A - the matrix
4232 
4233    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4234    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4235    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4236 
4237    Notes:
4238    If the *_nnz parameter is given then the *_nz parameter is ignored
4239 
4240    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4241    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4242    storage requirements for this matrix.
4243 
4244    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4245    processor than it must be used on all processors that share the object for
4246    that argument.
4247 
4248    The user MUST specify either the local or global matrix dimensions
4249    (possibly both).
4250 
4251    The parallel matrix is partitioned across processors such that the
4252    first m0 rows belong to process 0, the next m1 rows belong to
4253    process 1, the next m2 rows belong to process 2 etc.. where
4254    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4255    values corresponding to [m x N] submatrix.
4256 
4257    The columns are logically partitioned with the n0 columns belonging
4258    to 0th partition, the next n1 columns belonging to the next
4259    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4260 
4261    The DIAGONAL portion of the local submatrix on any given processor
4262    is the submatrix corresponding to the rows and columns m,n
4263    corresponding to the given processor. i.e diagonal matrix on
4264    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4265    etc. The remaining portion of the local submatrix [m x (N-n)]
4266    constitute the OFF-DIAGONAL portion. The example below better
4267    illustrates this concept.
4268 
4269    For a square global matrix we define each processor's diagonal portion
4270    to be its local rows and the corresponding columns (a square submatrix);
4271    each processor's off-diagonal portion encompasses the remainder of the
4272    local matrix (a rectangular submatrix).
4273 
4274    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4275 
4276    When calling this routine with a single process communicator, a matrix of
4277    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4278    type of communicator, use the construction mechanism
4279 .vb
4280      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4281 .ve
4282 
4283 $     MatCreate(...,&A);
4284 $     MatSetType(A,MATMPIAIJ);
4285 $     MatSetSizes(A, m,n,M,N);
4286 $     MatMPIAIJSetPreallocation(A,...);
4287 
4288    By default, this format uses inodes (identical nodes) when possible.
4289    We search for consecutive rows with the same nonzero structure, thereby
4290    reusing matrix information to achieve increased efficiency.
4291 
4292    Options Database Keys:
4293 +  -mat_no_inode  - Do not use inodes
4294 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4295 
4296 
4297 
4298    Example usage:
4299 
4300    Consider the following 8x8 matrix with 34 non-zero values, that is
4301    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4302    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4303    as follows
4304 
4305 .vb
4306             1  2  0  |  0  3  0  |  0  4
4307     Proc0   0  5  6  |  7  0  0  |  8  0
4308             9  0 10  | 11  0  0  | 12  0
4309     -------------------------------------
4310            13  0 14  | 15 16 17  |  0  0
4311     Proc1   0 18  0  | 19 20 21  |  0  0
4312             0  0  0  | 22 23  0  | 24  0
4313     -------------------------------------
4314     Proc2  25 26 27  |  0  0 28  | 29  0
4315            30  0  0  | 31 32 33  |  0 34
4316 .ve
4317 
4318    This can be represented as a collection of submatrices as
4319 
4320 .vb
4321       A B C
4322       D E F
4323       G H I
4324 .ve
4325 
4326    Where the submatrices A,B,C are owned by proc0, D,E,F are
4327    owned by proc1, G,H,I are owned by proc2.
4328 
4329    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4330    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4331    The 'M','N' parameters are 8,8, and have the same values on all procs.
4332 
4333    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4334    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4335    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4336    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4337    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4338    matrix, ans [DF] as another SeqAIJ matrix.
4339 
4340    When d_nz, o_nz parameters are specified, d_nz storage elements are
4341    allocated for every row of the local diagonal submatrix, and o_nz
4342    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4343    One way to choose d_nz and o_nz is to use the max nonzerors per local
4344    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4345    In this case, the values of d_nz,o_nz are
4346 .vb
4347      proc0 : dnz = 2, o_nz = 2
4348      proc1 : dnz = 3, o_nz = 2
4349      proc2 : dnz = 1, o_nz = 4
4350 .ve
4351    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4352    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4353    for proc3. i.e we are using 12+15+10=37 storage locations to store
4354    34 values.
4355 
4356    When d_nnz, o_nnz parameters are specified, the storage is specified
4357    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4358    In the above case the values for d_nnz,o_nnz are
4359 .vb
4360      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4361      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4362      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4363 .ve
4364    Here the space allocated is sum of all the above values i.e 34, and
4365    hence pre-allocation is perfect.
4366 
4367    Level: intermediate
4368 
4369 .keywords: matrix, aij, compressed row, sparse, parallel
4370 
4371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4372           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4373 @*/
4374 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4375 {
4376   PetscErrorCode ierr;
4377   PetscMPIInt    size;
4378 
4379   PetscFunctionBegin;
4380   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4381   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4382   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4383   if (size > 1) {
4384     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4385     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4386   } else {
4387     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4388     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4389   }
4390   PetscFunctionReturn(0);
4391 }
4392 
4393 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4394 {
4395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4396   PetscBool      flg;
4397   PetscErrorCode ierr;
4398 
4399   PetscFunctionBegin;
4400   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4401   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4402   if (Ad)     *Ad     = a->A;
4403   if (Ao)     *Ao     = a->B;
4404   if (colmap) *colmap = a->garray;
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4409 {
4410   PetscErrorCode ierr;
4411   PetscInt       m,N,i,rstart,nnz,Ii;
4412   PetscInt       *indx;
4413   PetscScalar    *values;
4414 
4415   PetscFunctionBegin;
4416   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4417   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4418     PetscInt       *dnz,*onz,sum,bs,cbs;
4419 
4420     if (n == PETSC_DECIDE) {
4421       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4422     }
4423     /* Check sum(n) = N */
4424     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4425     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4426 
4427     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4428     rstart -= m;
4429 
4430     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4431     for (i=0; i<m; i++) {
4432       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4433       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4434       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4435     }
4436 
4437     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4438     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4439     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4440     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4441     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4442     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4443     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4444     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4445   }
4446 
4447   /* numeric phase */
4448   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4449   for (i=0; i<m; i++) {
4450     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4451     Ii   = i + rstart;
4452     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4453     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4454   }
4455   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4456   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4457   PetscFunctionReturn(0);
4458 }
4459 
4460 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4461 {
4462   PetscErrorCode    ierr;
4463   PetscMPIInt       rank;
4464   PetscInt          m,N,i,rstart,nnz;
4465   size_t            len;
4466   const PetscInt    *indx;
4467   PetscViewer       out;
4468   char              *name;
4469   Mat               B;
4470   const PetscScalar *values;
4471 
4472   PetscFunctionBegin;
4473   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4474   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4475   /* Should this be the type of the diagonal block of A? */
4476   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4477   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4478   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4479   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4480   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4481   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4482   for (i=0; i<m; i++) {
4483     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4484     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4485     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4486   }
4487   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4488   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4489 
4490   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4491   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4492   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4493   sprintf(name,"%s.%d",outfile,rank);
4494   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4495   ierr = PetscFree(name);CHKERRQ(ierr);
4496   ierr = MatView(B,out);CHKERRQ(ierr);
4497   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4498   ierr = MatDestroy(&B);CHKERRQ(ierr);
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4503 {
4504   PetscErrorCode      ierr;
4505   Mat_Merge_SeqsToMPI *merge;
4506   PetscContainer      container;
4507 
4508   PetscFunctionBegin;
4509   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4510   if (container) {
4511     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4512     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4513     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4514     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4515     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4516     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4517     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4518     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4519     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4520     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4521     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4522     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4523     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4524     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4525     ierr = PetscFree(merge);CHKERRQ(ierr);
4526     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4527   }
4528   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 #include <../src/mat/utils/freespace.h>
4533 #include <petscbt.h>
4534 
4535 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4536 {
4537   PetscErrorCode      ierr;
4538   MPI_Comm            comm;
4539   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4540   PetscMPIInt         size,rank,taga,*len_s;
4541   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4542   PetscInt            proc,m;
4543   PetscInt            **buf_ri,**buf_rj;
4544   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4545   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4546   MPI_Request         *s_waits,*r_waits;
4547   MPI_Status          *status;
4548   MatScalar           *aa=a->a;
4549   MatScalar           **abuf_r,*ba_i;
4550   Mat_Merge_SeqsToMPI *merge;
4551   PetscContainer      container;
4552 
4553   PetscFunctionBegin;
4554   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4555   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4556 
4557   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4558   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4559 
4560   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4561   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4562 
4563   bi     = merge->bi;
4564   bj     = merge->bj;
4565   buf_ri = merge->buf_ri;
4566   buf_rj = merge->buf_rj;
4567 
4568   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4569   owners = merge->rowmap->range;
4570   len_s  = merge->len_s;
4571 
4572   /* send and recv matrix values */
4573   /*-----------------------------*/
4574   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4575   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4576 
4577   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4578   for (proc=0,k=0; proc<size; proc++) {
4579     if (!len_s[proc]) continue;
4580     i    = owners[proc];
4581     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4582     k++;
4583   }
4584 
4585   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4586   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4587   ierr = PetscFree(status);CHKERRQ(ierr);
4588 
4589   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4590   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4591 
4592   /* insert mat values of mpimat */
4593   /*----------------------------*/
4594   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4595   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4596 
4597   for (k=0; k<merge->nrecv; k++) {
4598     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4599     nrows       = *(buf_ri_k[k]);
4600     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4601     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4602   }
4603 
4604   /* set values of ba */
4605   m = merge->rowmap->n;
4606   for (i=0; i<m; i++) {
4607     arow = owners[rank] + i;
4608     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4609     bnzi = bi[i+1] - bi[i];
4610     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4611 
4612     /* add local non-zero vals of this proc's seqmat into ba */
4613     anzi   = ai[arow+1] - ai[arow];
4614     aj     = a->j + ai[arow];
4615     aa     = a->a + ai[arow];
4616     nextaj = 0;
4617     for (j=0; nextaj<anzi; j++) {
4618       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4619         ba_i[j] += aa[nextaj++];
4620       }
4621     }
4622 
4623     /* add received vals into ba */
4624     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4625       /* i-th row */
4626       if (i == *nextrow[k]) {
4627         anzi   = *(nextai[k]+1) - *nextai[k];
4628         aj     = buf_rj[k] + *(nextai[k]);
4629         aa     = abuf_r[k] + *(nextai[k]);
4630         nextaj = 0;
4631         for (j=0; nextaj<anzi; j++) {
4632           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4633             ba_i[j] += aa[nextaj++];
4634           }
4635         }
4636         nextrow[k]++; nextai[k]++;
4637       }
4638     }
4639     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4640   }
4641   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4642   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4643 
4644   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4645   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4646   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4647   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4648   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4653 {
4654   PetscErrorCode      ierr;
4655   Mat                 B_mpi;
4656   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4657   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4658   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4659   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4660   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4661   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4662   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4663   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4664   MPI_Status          *status;
4665   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4666   PetscBT             lnkbt;
4667   Mat_Merge_SeqsToMPI *merge;
4668   PetscContainer      container;
4669 
4670   PetscFunctionBegin;
4671   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4672 
4673   /* make sure it is a PETSc comm */
4674   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4677 
4678   ierr = PetscNew(&merge);CHKERRQ(ierr);
4679   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4680 
4681   /* determine row ownership */
4682   /*---------------------------------------------------------*/
4683   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4684   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4685   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4686   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4687   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4688   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4689   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4690 
4691   m      = merge->rowmap->n;
4692   owners = merge->rowmap->range;
4693 
4694   /* determine the number of messages to send, their lengths */
4695   /*---------------------------------------------------------*/
4696   len_s = merge->len_s;
4697 
4698   len          = 0; /* length of buf_si[] */
4699   merge->nsend = 0;
4700   for (proc=0; proc<size; proc++) {
4701     len_si[proc] = 0;
4702     if (proc == rank) {
4703       len_s[proc] = 0;
4704     } else {
4705       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4706       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4707     }
4708     if (len_s[proc]) {
4709       merge->nsend++;
4710       nrows = 0;
4711       for (i=owners[proc]; i<owners[proc+1]; i++) {
4712         if (ai[i+1] > ai[i]) nrows++;
4713       }
4714       len_si[proc] = 2*(nrows+1);
4715       len         += len_si[proc];
4716     }
4717   }
4718 
4719   /* determine the number and length of messages to receive for ij-structure */
4720   /*-------------------------------------------------------------------------*/
4721   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4722   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4723 
4724   /* post the Irecv of j-structure */
4725   /*-------------------------------*/
4726   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4727   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4728 
4729   /* post the Isend of j-structure */
4730   /*--------------------------------*/
4731   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4732 
4733   for (proc=0, k=0; proc<size; proc++) {
4734     if (!len_s[proc]) continue;
4735     i    = owners[proc];
4736     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4737     k++;
4738   }
4739 
4740   /* receives and sends of j-structure are complete */
4741   /*------------------------------------------------*/
4742   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4743   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4744 
4745   /* send and recv i-structure */
4746   /*---------------------------*/
4747   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4748   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4749 
4750   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4751   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4752   for (proc=0,k=0; proc<size; proc++) {
4753     if (!len_s[proc]) continue;
4754     /* form outgoing message for i-structure:
4755          buf_si[0]:                 nrows to be sent
4756                [1:nrows]:           row index (global)
4757                [nrows+1:2*nrows+1]: i-structure index
4758     */
4759     /*-------------------------------------------*/
4760     nrows       = len_si[proc]/2 - 1;
4761     buf_si_i    = buf_si + nrows+1;
4762     buf_si[0]   = nrows;
4763     buf_si_i[0] = 0;
4764     nrows       = 0;
4765     for (i=owners[proc]; i<owners[proc+1]; i++) {
4766       anzi = ai[i+1] - ai[i];
4767       if (anzi) {
4768         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4769         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4770         nrows++;
4771       }
4772     }
4773     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4774     k++;
4775     buf_si += len_si[proc];
4776   }
4777 
4778   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4779   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4780 
4781   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4782   for (i=0; i<merge->nrecv; i++) {
4783     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4784   }
4785 
4786   ierr = PetscFree(len_si);CHKERRQ(ierr);
4787   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4788   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4789   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4790   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4791   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4792   ierr = PetscFree(status);CHKERRQ(ierr);
4793 
4794   /* compute a local seq matrix in each processor */
4795   /*----------------------------------------------*/
4796   /* allocate bi array and free space for accumulating nonzero column info */
4797   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4798   bi[0] = 0;
4799 
4800   /* create and initialize a linked list */
4801   nlnk = N+1;
4802   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4803 
4804   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4805   len  = ai[owners[rank+1]] - ai[owners[rank]];
4806   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4807 
4808   current_space = free_space;
4809 
4810   /* determine symbolic info for each local row */
4811   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4812 
4813   for (k=0; k<merge->nrecv; k++) {
4814     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4815     nrows       = *buf_ri_k[k];
4816     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4817     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4818   }
4819 
4820   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4821   len  = 0;
4822   for (i=0; i<m; i++) {
4823     bnzi = 0;
4824     /* add local non-zero cols of this proc's seqmat into lnk */
4825     arow  = owners[rank] + i;
4826     anzi  = ai[arow+1] - ai[arow];
4827     aj    = a->j + ai[arow];
4828     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4829     bnzi += nlnk;
4830     /* add received col data into lnk */
4831     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4832       if (i == *nextrow[k]) { /* i-th row */
4833         anzi  = *(nextai[k]+1) - *nextai[k];
4834         aj    = buf_rj[k] + *nextai[k];
4835         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4836         bnzi += nlnk;
4837         nextrow[k]++; nextai[k]++;
4838       }
4839     }
4840     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4841 
4842     /* if free space is not available, make more free space */
4843     if (current_space->local_remaining<bnzi) {
4844       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4845       nspacedouble++;
4846     }
4847     /* copy data into free space, then initialize lnk */
4848     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4849     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4850 
4851     current_space->array           += bnzi;
4852     current_space->local_used      += bnzi;
4853     current_space->local_remaining -= bnzi;
4854 
4855     bi[i+1] = bi[i] + bnzi;
4856   }
4857 
4858   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4859 
4860   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4861   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4862   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4863 
4864   /* create symbolic parallel matrix B_mpi */
4865   /*---------------------------------------*/
4866   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4867   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4868   if (n==PETSC_DECIDE) {
4869     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4870   } else {
4871     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4872   }
4873   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4874   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4875   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4876   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4877   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4878 
4879   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4880   B_mpi->assembled    = PETSC_FALSE;
4881   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4882   merge->bi           = bi;
4883   merge->bj           = bj;
4884   merge->buf_ri       = buf_ri;
4885   merge->buf_rj       = buf_rj;
4886   merge->coi          = NULL;
4887   merge->coj          = NULL;
4888   merge->owners_co    = NULL;
4889 
4890   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4891 
4892   /* attach the supporting struct to B_mpi for reuse */
4893   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4894   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4895   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4896   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4897   *mpimat = B_mpi;
4898 
4899   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 /*@C
4904       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4905                  matrices from each processor
4906 
4907     Collective on MPI_Comm
4908 
4909    Input Parameters:
4910 +    comm - the communicators the parallel matrix will live on
4911 .    seqmat - the input sequential matrices
4912 .    m - number of local rows (or PETSC_DECIDE)
4913 .    n - number of local columns (or PETSC_DECIDE)
4914 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4915 
4916    Output Parameter:
4917 .    mpimat - the parallel matrix generated
4918 
4919     Level: advanced
4920 
4921    Notes:
4922      The dimensions of the sequential matrix in each processor MUST be the same.
4923      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4924      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4925 @*/
4926 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4927 {
4928   PetscErrorCode ierr;
4929   PetscMPIInt    size;
4930 
4931   PetscFunctionBegin;
4932   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4933   if (size == 1) {
4934     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935     if (scall == MAT_INITIAL_MATRIX) {
4936       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4937     } else {
4938       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4939     }
4940     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4941     PetscFunctionReturn(0);
4942   }
4943   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4944   if (scall == MAT_INITIAL_MATRIX) {
4945     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4946   }
4947   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4948   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4949   PetscFunctionReturn(0);
4950 }
4951 
4952 /*@
4953      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4954           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4955           with MatGetSize()
4956 
4957     Not Collective
4958 
4959    Input Parameters:
4960 +    A - the matrix
4961 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4962 
4963    Output Parameter:
4964 .    A_loc - the local sequential matrix generated
4965 
4966     Level: developer
4967 
4968 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4969 
4970 @*/
4971 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4972 {
4973   PetscErrorCode ierr;
4974   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4975   Mat_SeqAIJ     *mat,*a,*b;
4976   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4977   MatScalar      *aa,*ba,*cam;
4978   PetscScalar    *ca;
4979   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4980   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4981   PetscBool      match;
4982   MPI_Comm       comm;
4983   PetscMPIInt    size;
4984 
4985   PetscFunctionBegin;
4986   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4987   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4988   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4989   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4990   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4991 
4992   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4993   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4994   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4995   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4996   aa = a->a; ba = b->a;
4997   if (scall == MAT_INITIAL_MATRIX) {
4998     if (size == 1) {
4999       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5000       PetscFunctionReturn(0);
5001     }
5002 
5003     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5004     ci[0] = 0;
5005     for (i=0; i<am; i++) {
5006       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5007     }
5008     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5009     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5010     k    = 0;
5011     for (i=0; i<am; i++) {
5012       ncols_o = bi[i+1] - bi[i];
5013       ncols_d = ai[i+1] - ai[i];
5014       /* off-diagonal portion of A */
5015       for (jo=0; jo<ncols_o; jo++) {
5016         col = cmap[*bj];
5017         if (col >= cstart) break;
5018         cj[k]   = col; bj++;
5019         ca[k++] = *ba++;
5020       }
5021       /* diagonal portion of A */
5022       for (j=0; j<ncols_d; j++) {
5023         cj[k]   = cstart + *aj++;
5024         ca[k++] = *aa++;
5025       }
5026       /* off-diagonal portion of A */
5027       for (j=jo; j<ncols_o; j++) {
5028         cj[k]   = cmap[*bj++];
5029         ca[k++] = *ba++;
5030       }
5031     }
5032     /* put together the new matrix */
5033     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5034     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5035     /* Since these are PETSc arrays, change flags to free them as necessary. */
5036     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5037     mat->free_a  = PETSC_TRUE;
5038     mat->free_ij = PETSC_TRUE;
5039     mat->nonew   = 0;
5040   } else if (scall == MAT_REUSE_MATRIX) {
5041     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5042     ci = mat->i; cj = mat->j; cam = mat->a;
5043     for (i=0; i<am; i++) {
5044       /* off-diagonal portion of A */
5045       ncols_o = bi[i+1] - bi[i];
5046       for (jo=0; jo<ncols_o; jo++) {
5047         col = cmap[*bj];
5048         if (col >= cstart) break;
5049         *cam++ = *ba++; bj++;
5050       }
5051       /* diagonal portion of A */
5052       ncols_d = ai[i+1] - ai[i];
5053       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5054       /* off-diagonal portion of A */
5055       for (j=jo; j<ncols_o; j++) {
5056         *cam++ = *ba++; bj++;
5057       }
5058     }
5059   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5060   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5061   PetscFunctionReturn(0);
5062 }
5063 
5064 /*@C
5065      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5066 
5067     Not Collective
5068 
5069    Input Parameters:
5070 +    A - the matrix
5071 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5072 -    row, col - index sets of rows and columns to extract (or NULL)
5073 
5074    Output Parameter:
5075 .    A_loc - the local sequential matrix generated
5076 
5077     Level: developer
5078 
5079 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5080 
5081 @*/
5082 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5083 {
5084   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5085   PetscErrorCode ierr;
5086   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5087   IS             isrowa,iscola;
5088   Mat            *aloc;
5089   PetscBool      match;
5090 
5091   PetscFunctionBegin;
5092   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5093   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5094   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5095   if (!row) {
5096     start = A->rmap->rstart; end = A->rmap->rend;
5097     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5098   } else {
5099     isrowa = *row;
5100   }
5101   if (!col) {
5102     start = A->cmap->rstart;
5103     cmap  = a->garray;
5104     nzA   = a->A->cmap->n;
5105     nzB   = a->B->cmap->n;
5106     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5107     ncols = 0;
5108     for (i=0; i<nzB; i++) {
5109       if (cmap[i] < start) idx[ncols++] = cmap[i];
5110       else break;
5111     }
5112     imark = i;
5113     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5114     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5115     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5116   } else {
5117     iscola = *col;
5118   }
5119   if (scall != MAT_INITIAL_MATRIX) {
5120     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5121     aloc[0] = *A_loc;
5122   }
5123   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5124   if (!col) { /* attach global id of condensed columns */
5125     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5126   }
5127   *A_loc = aloc[0];
5128   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5129   if (!row) {
5130     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5131   }
5132   if (!col) {
5133     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5134   }
5135   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5136   PetscFunctionReturn(0);
5137 }
5138 
5139 /*@C
5140     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5141 
5142     Collective on Mat
5143 
5144    Input Parameters:
5145 +    A,B - the matrices in mpiaij format
5146 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5147 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5148 
5149    Output Parameter:
5150 +    rowb, colb - index sets of rows and columns of B to extract
5151 -    B_seq - the sequential matrix generated
5152 
5153     Level: developer
5154 
5155 @*/
5156 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5157 {
5158   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5159   PetscErrorCode ierr;
5160   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5161   IS             isrowb,iscolb;
5162   Mat            *bseq=NULL;
5163 
5164   PetscFunctionBegin;
5165   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5166     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5167   }
5168   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5169 
5170   if (scall == MAT_INITIAL_MATRIX) {
5171     start = A->cmap->rstart;
5172     cmap  = a->garray;
5173     nzA   = a->A->cmap->n;
5174     nzB   = a->B->cmap->n;
5175     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5176     ncols = 0;
5177     for (i=0; i<nzB; i++) {  /* row < local row index */
5178       if (cmap[i] < start) idx[ncols++] = cmap[i];
5179       else break;
5180     }
5181     imark = i;
5182     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5183     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5184     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5185     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5186   } else {
5187     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5188     isrowb  = *rowb; iscolb = *colb;
5189     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5190     bseq[0] = *B_seq;
5191   }
5192   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5193   *B_seq = bseq[0];
5194   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5195   if (!rowb) {
5196     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5197   } else {
5198     *rowb = isrowb;
5199   }
5200   if (!colb) {
5201     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5202   } else {
5203     *colb = iscolb;
5204   }
5205   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5206   PetscFunctionReturn(0);
5207 }
5208 
5209 /*
5210     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5211     of the OFF-DIAGONAL portion of local A
5212 
5213     Collective on Mat
5214 
5215    Input Parameters:
5216 +    A,B - the matrices in mpiaij format
5217 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5218 
5219    Output Parameter:
5220 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5221 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5222 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5223 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5224 
5225     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5226      for this matrix. This is not desirable..
5227 
5228     Level: developer
5229 
5230 */
5231 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5232 {
5233   PetscErrorCode         ierr;
5234   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5235   Mat_SeqAIJ             *b_oth;
5236   VecScatter             ctx;
5237   MPI_Comm               comm;
5238   const PetscMPIInt      *rprocs,*sprocs;
5239   const PetscInt         *srow,*rstarts,*sstarts;
5240   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5241   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5242   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5243   MPI_Request            *rwaits = NULL,*swaits = NULL;
5244   MPI_Status             rstatus;
5245   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5246 
5247   PetscFunctionBegin;
5248   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5250 
5251   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5252     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5253   }
5254   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5255   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5256 
5257   if (size == 1) {
5258     startsj_s = NULL;
5259     bufa_ptr  = NULL;
5260     *B_oth    = NULL;
5261     PetscFunctionReturn(0);
5262   }
5263 
5264   ctx = a->Mvctx;
5265   tag = ((PetscObject)ctx)->tag;
5266 
5267   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5268   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5269   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5270   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5271   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5272   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5273   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5274 
5275   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5276   if (scall == MAT_INITIAL_MATRIX) {
5277     /* i-array */
5278     /*---------*/
5279     /*  post receives */
5280     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5281     for (i=0; i<nrecvs; i++) {
5282       rowlen = rvalues + rstarts[i]*rbs;
5283       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5284       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5285     }
5286 
5287     /* pack the outgoing message */
5288     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5289 
5290     sstartsj[0] = 0;
5291     rstartsj[0] = 0;
5292     len         = 0; /* total length of j or a array to be sent */
5293     if (nsends) {
5294       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5295       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5296     }
5297     for (i=0; i<nsends; i++) {
5298       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5299       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5300       for (j=0; j<nrows; j++) {
5301         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5302         for (l=0; l<sbs; l++) {
5303           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5304 
5305           rowlen[j*sbs+l] = ncols;
5306 
5307           len += ncols;
5308           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5309         }
5310         k++;
5311       }
5312       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5313 
5314       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5315     }
5316     /* recvs and sends of i-array are completed */
5317     i = nrecvs;
5318     while (i--) {
5319       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5320     }
5321     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5322     ierr = PetscFree(svalues);CHKERRQ(ierr);
5323 
5324     /* allocate buffers for sending j and a arrays */
5325     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5326     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5327 
5328     /* create i-array of B_oth */
5329     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5330 
5331     b_othi[0] = 0;
5332     len       = 0; /* total length of j or a array to be received */
5333     k         = 0;
5334     for (i=0; i<nrecvs; i++) {
5335       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5336       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5337       for (j=0; j<nrows; j++) {
5338         b_othi[k+1] = b_othi[k] + rowlen[j];
5339         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5340         k++;
5341       }
5342       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5343     }
5344     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5345 
5346     /* allocate space for j and a arrrays of B_oth */
5347     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5348     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5349 
5350     /* j-array */
5351     /*---------*/
5352     /*  post receives of j-array */
5353     for (i=0; i<nrecvs; i++) {
5354       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5355       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5356     }
5357 
5358     /* pack the outgoing message j-array */
5359     if (nsends) k = sstarts[0];
5360     for (i=0; i<nsends; i++) {
5361       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5362       bufJ  = bufj+sstartsj[i];
5363       for (j=0; j<nrows; j++) {
5364         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5365         for (ll=0; ll<sbs; ll++) {
5366           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5367           for (l=0; l<ncols; l++) {
5368             *bufJ++ = cols[l];
5369           }
5370           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5371         }
5372       }
5373       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5374     }
5375 
5376     /* recvs and sends of j-array are completed */
5377     i = nrecvs;
5378     while (i--) {
5379       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5380     }
5381     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5382   } else if (scall == MAT_REUSE_MATRIX) {
5383     sstartsj = *startsj_s;
5384     rstartsj = *startsj_r;
5385     bufa     = *bufa_ptr;
5386     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5387     b_otha   = b_oth->a;
5388   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5389 
5390   /* a-array */
5391   /*---------*/
5392   /*  post receives of a-array */
5393   for (i=0; i<nrecvs; i++) {
5394     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5395     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5396   }
5397 
5398   /* pack the outgoing message a-array */
5399   if (nsends) k = sstarts[0];
5400   for (i=0; i<nsends; i++) {
5401     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5402     bufA  = bufa+sstartsj[i];
5403     for (j=0; j<nrows; j++) {
5404       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5405       for (ll=0; ll<sbs; ll++) {
5406         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5407         for (l=0; l<ncols; l++) {
5408           *bufA++ = vals[l];
5409         }
5410         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5411       }
5412     }
5413     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5414   }
5415   /* recvs and sends of a-array are completed */
5416   i = nrecvs;
5417   while (i--) {
5418     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5419   }
5420   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5421   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5422 
5423   if (scall == MAT_INITIAL_MATRIX) {
5424     /* put together the new matrix */
5425     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5426 
5427     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5428     /* Since these are PETSc arrays, change flags to free them as necessary. */
5429     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5430     b_oth->free_a  = PETSC_TRUE;
5431     b_oth->free_ij = PETSC_TRUE;
5432     b_oth->nonew   = 0;
5433 
5434     ierr = PetscFree(bufj);CHKERRQ(ierr);
5435     if (!startsj_s || !bufa_ptr) {
5436       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5437       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5438     } else {
5439       *startsj_s = sstartsj;
5440       *startsj_r = rstartsj;
5441       *bufa_ptr  = bufa;
5442     }
5443   }
5444 
5445   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5446   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5447   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5448   PetscFunctionReturn(0);
5449 }
5450 
5451 /*@C
5452   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5453 
5454   Not Collective
5455 
5456   Input Parameters:
5457 . A - The matrix in mpiaij format
5458 
5459   Output Parameter:
5460 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5461 . colmap - A map from global column index to local index into lvec
5462 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5463 
5464   Level: developer
5465 
5466 @*/
5467 #if defined(PETSC_USE_CTABLE)
5468 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5469 #else
5470 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5471 #endif
5472 {
5473   Mat_MPIAIJ *a;
5474 
5475   PetscFunctionBegin;
5476   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5477   PetscValidPointer(lvec, 2);
5478   PetscValidPointer(colmap, 3);
5479   PetscValidPointer(multScatter, 4);
5480   a = (Mat_MPIAIJ*) A->data;
5481   if (lvec) *lvec = a->lvec;
5482   if (colmap) *colmap = a->colmap;
5483   if (multScatter) *multScatter = a->Mvctx;
5484   PetscFunctionReturn(0);
5485 }
5486 
5487 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5488 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5489 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5490 #if defined(PETSC_HAVE_MKL_SPARSE)
5491 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5492 #endif
5493 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5494 #if defined(PETSC_HAVE_ELEMENTAL)
5495 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5496 #endif
5497 #if defined(PETSC_HAVE_HYPRE)
5498 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5499 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5500 #endif
5501 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5502 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5503 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5504 
5505 /*
5506     Computes (B'*A')' since computing B*A directly is untenable
5507 
5508                n                       p                          p
5509         (              )       (              )         (                  )
5510       m (      A       )  *  n (       B      )   =   m (         C        )
5511         (              )       (              )         (                  )
5512 
5513 */
5514 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5515 {
5516   PetscErrorCode ierr;
5517   Mat            At,Bt,Ct;
5518 
5519   PetscFunctionBegin;
5520   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5521   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5522   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5523   ierr = MatDestroy(&At);CHKERRQ(ierr);
5524   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5525   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5526   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5527   PetscFunctionReturn(0);
5528 }
5529 
5530 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5531 {
5532   PetscErrorCode ierr;
5533   PetscInt       m=A->rmap->n,n=B->cmap->n;
5534   Mat            Cmat;
5535 
5536   PetscFunctionBegin;
5537   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5538   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5539   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5540   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5541   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5542   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5543   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5544   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5545 
5546   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5547 
5548   *C = Cmat;
5549   PetscFunctionReturn(0);
5550 }
5551 
5552 /* ----------------------------------------------------------------*/
5553 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5554 {
5555   PetscErrorCode ierr;
5556 
5557   PetscFunctionBegin;
5558   if (scall == MAT_INITIAL_MATRIX) {
5559     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5560     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5561     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5562   }
5563   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5564   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5565   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5566   PetscFunctionReturn(0);
5567 }
5568 
5569 /*MC
5570    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5571 
5572    Options Database Keys:
5573 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5574 
5575   Level: beginner
5576 
5577 .seealso: MatCreateAIJ()
5578 M*/
5579 
5580 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5581 {
5582   Mat_MPIAIJ     *b;
5583   PetscErrorCode ierr;
5584   PetscMPIInt    size;
5585 
5586   PetscFunctionBegin;
5587   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5588 
5589   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5590   B->data       = (void*)b;
5591   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5592   B->assembled  = PETSC_FALSE;
5593   B->insertmode = NOT_SET_VALUES;
5594   b->size       = size;
5595 
5596   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5597 
5598   /* build cache for off array entries formed */
5599   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5600 
5601   b->donotstash  = PETSC_FALSE;
5602   b->colmap      = 0;
5603   b->garray      = 0;
5604   b->roworiented = PETSC_TRUE;
5605 
5606   /* stuff used for matrix vector multiply */
5607   b->lvec  = NULL;
5608   b->Mvctx = NULL;
5609 
5610   /* stuff for MatGetRow() */
5611   b->rowindices   = 0;
5612   b->rowvalues    = 0;
5613   b->getrowactive = PETSC_FALSE;
5614 
5615   /* flexible pointer used in CUSP/CUSPARSE classes */
5616   b->spptr = NULL;
5617 
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5628 #if defined(PETSC_HAVE_MKL_SPARSE)
5629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5630 #endif
5631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5633 #if defined(PETSC_HAVE_ELEMENTAL)
5634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5635 #endif
5636 #if defined(PETSC_HAVE_HYPRE)
5637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5638 #endif
5639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5644 #if defined(PETSC_HAVE_HYPRE)
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5646 #endif
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5649   PetscFunctionReturn(0);
5650 }
5651 
5652 /*@C
5653      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5654          and "off-diagonal" part of the matrix in CSR format.
5655 
5656    Collective on MPI_Comm
5657 
5658    Input Parameters:
5659 +  comm - MPI communicator
5660 .  m - number of local rows (Cannot be PETSC_DECIDE)
5661 .  n - This value should be the same as the local size used in creating the
5662        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5663        calculated if N is given) For square matrices n is almost always m.
5664 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5665 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5666 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5667 .   j - column indices
5668 .   a - matrix values
5669 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5670 .   oj - column indices
5671 -   oa - matrix values
5672 
5673    Output Parameter:
5674 .   mat - the matrix
5675 
5676    Level: advanced
5677 
5678    Notes:
5679        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5680        must free the arrays once the matrix has been destroyed and not before.
5681 
5682        The i and j indices are 0 based
5683 
5684        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5685 
5686        This sets local rows and cannot be used to set off-processor values.
5687 
5688        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5689        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5690        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5691        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5692        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5693        communication if it is known that only local entries will be set.
5694 
5695 .keywords: matrix, aij, compressed row, sparse, parallel
5696 
5697 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5698           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5699 @*/
5700 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5701 {
5702   PetscErrorCode ierr;
5703   Mat_MPIAIJ     *maij;
5704 
5705   PetscFunctionBegin;
5706   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5707   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5708   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5709   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5710   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5711   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5712   maij = (Mat_MPIAIJ*) (*mat)->data;
5713 
5714   (*mat)->preallocated = PETSC_TRUE;
5715 
5716   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5717   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5718 
5719   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5720   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5721 
5722   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5723   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5724   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5725   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5726 
5727   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5728   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5729   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5730   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5731   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5732   PetscFunctionReturn(0);
5733 }
5734 
5735 /*
5736     Special version for direct calls from Fortran
5737 */
5738 #include <petsc/private/fortranimpl.h>
5739 
5740 /* Change these macros so can be used in void function */
5741 #undef CHKERRQ
5742 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5743 #undef SETERRQ2
5744 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5745 #undef SETERRQ3
5746 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5747 #undef SETERRQ
5748 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5749 
5750 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5751 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5752 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5753 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5754 #else
5755 #endif
5756 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5757 {
5758   Mat            mat  = *mmat;
5759   PetscInt       m    = *mm, n = *mn;
5760   InsertMode     addv = *maddv;
5761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5762   PetscScalar    value;
5763   PetscErrorCode ierr;
5764 
5765   MatCheckPreallocated(mat,1);
5766   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5767 
5768 #if defined(PETSC_USE_DEBUG)
5769   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5770 #endif
5771   {
5772     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5773     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5774     PetscBool roworiented = aij->roworiented;
5775 
5776     /* Some Variables required in the macro */
5777     Mat        A                 = aij->A;
5778     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5779     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5780     MatScalar  *aa               = a->a;
5781     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5782     Mat        B                 = aij->B;
5783     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5784     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5785     MatScalar  *ba               = b->a;
5786 
5787     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5788     PetscInt  nonew = a->nonew;
5789     MatScalar *ap1,*ap2;
5790 
5791     PetscFunctionBegin;
5792     for (i=0; i<m; i++) {
5793       if (im[i] < 0) continue;
5794 #if defined(PETSC_USE_DEBUG)
5795       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5796 #endif
5797       if (im[i] >= rstart && im[i] < rend) {
5798         row      = im[i] - rstart;
5799         lastcol1 = -1;
5800         rp1      = aj + ai[row];
5801         ap1      = aa + ai[row];
5802         rmax1    = aimax[row];
5803         nrow1    = ailen[row];
5804         low1     = 0;
5805         high1    = nrow1;
5806         lastcol2 = -1;
5807         rp2      = bj + bi[row];
5808         ap2      = ba + bi[row];
5809         rmax2    = bimax[row];
5810         nrow2    = bilen[row];
5811         low2     = 0;
5812         high2    = nrow2;
5813 
5814         for (j=0; j<n; j++) {
5815           if (roworiented) value = v[i*n+j];
5816           else value = v[i+j*m];
5817           if (in[j] >= cstart && in[j] < cend) {
5818             col = in[j] - cstart;
5819             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5820             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5821           } else if (in[j] < 0) continue;
5822 #if defined(PETSC_USE_DEBUG)
5823           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5824           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5825 #endif
5826           else {
5827             if (mat->was_assembled) {
5828               if (!aij->colmap) {
5829                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5830               }
5831 #if defined(PETSC_USE_CTABLE)
5832               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5833               col--;
5834 #else
5835               col = aij->colmap[in[j]] - 1;
5836 #endif
5837               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5838               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5839                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5840                 col  =  in[j];
5841                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5842                 B     = aij->B;
5843                 b     = (Mat_SeqAIJ*)B->data;
5844                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5845                 rp2   = bj + bi[row];
5846                 ap2   = ba + bi[row];
5847                 rmax2 = bimax[row];
5848                 nrow2 = bilen[row];
5849                 low2  = 0;
5850                 high2 = nrow2;
5851                 bm    = aij->B->rmap->n;
5852                 ba    = b->a;
5853               }
5854             } else col = in[j];
5855             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5856           }
5857         }
5858       } else if (!aij->donotstash) {
5859         if (roworiented) {
5860           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5861         } else {
5862           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5863         }
5864       }
5865     }
5866   }
5867   PetscFunctionReturnVoid();
5868 }
5869