xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3d6a3516742e32e485e84f6dcdca1b62f17153b7)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
218   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) ap1[_i] += value;   \
444           else                    ap1[_i] = value; \
445           goto a_noinsert; \
446         } \
447       }  \
448       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
449       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
450       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
451       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
452       N = nrow1++ - 1; a->nz++; high1++; \
453       /* shift up all the later entries in this row */ \
454       for (ii=N; ii>=_i; ii--) { \
455         rp1[ii+1] = rp1[ii]; \
456         ap1[ii+1] = ap1[ii]; \
457       } \
458       rp1[_i] = col;  \
459       ap1[_i] = value;  \
460       A->nonzerostate++;\
461       a_noinsert: ; \
462       ailen[row] = nrow1; \
463 }
464 
465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
466   { \
467     if (col <= lastcol2) low2 = 0;                        \
468     else high2 = nrow2;                                   \
469     lastcol2 = col;                                       \
470     while (high2-low2 > 5) {                              \
471       t = (low2+high2)/2;                                 \
472       if (rp2[t] > col) high2 = t;                        \
473       else             low2  = t;                         \
474     }                                                     \
475     for (_i=low2; _i<high2; _i++) {                       \
476       if (rp2[_i] > col) break;                           \
477       if (rp2[_i] == col) {                               \
478         if (addv == ADD_VALUES) ap2[_i] += value;         \
479         else                    ap2[_i] = value;          \
480         goto b_noinsert;                                  \
481       }                                                   \
482     }                                                     \
483     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
484     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
485     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
486     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
487     N = nrow2++ - 1; b->nz++; high2++;                    \
488     /* shift up all the later entries in this row */      \
489     for (ii=N; ii>=_i; ii--) {                            \
490       rp2[ii+1] = rp2[ii];                                \
491       ap2[ii+1] = ap2[ii];                                \
492     }                                                     \
493     rp2[_i] = col;                                        \
494     ap2[_i] = value;                                      \
495     B->nonzerostate++;                                    \
496     b_noinsert: ;                                         \
497     bilen[row] = nrow2;                                   \
498   }
499 
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
527 {
528   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
529   PetscScalar    value;
530   PetscErrorCode ierr;
531   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
532   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
533   PetscBool      roworiented = aij->roworiented;
534 
535   /* Some Variables required in the macro */
536   Mat        A                 = aij->A;
537   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
538   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
539   MatScalar  *aa               = a->a;
540   PetscBool  ignorezeroentries = a->ignorezeroentries;
541   Mat        B                 = aij->B;
542   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
543   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
544   MatScalar  *ba               = b->a;
545 
546   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
547   PetscInt  nonew;
548   MatScalar *ap1,*ap2;
549 
550   PetscFunctionBegin;
551   for (i=0; i<m; i++) {
552     if (im[i] < 0) continue;
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
555 #endif
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = aj + ai[row];
560       ap1      = aa + ai[row];
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = bj + bi[row];
567       ap2      = ba + bi[row];
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j=0; j<n; j++) {
574         if (roworiented) value = v[i*n+j];
575         else             value = v[i+j*m];
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
580           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!aij->colmap) {
588               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 #if defined(PETSC_USE_CTABLE)
591             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
592             col--;
593 #else
594             col = aij->colmap[in[j]] - 1;
595 #endif
596             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
597               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
598               col  =  in[j];
599               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
600               B     = aij->B;
601               b     = (Mat_SeqAIJ*)B->data;
602               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
603               rp2   = bj + bi[row];
604               ap2   = ba + bi[row];
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0) {
612               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
613                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
614               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
619         }
620       }
621     } else {
622       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
638     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
639     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
640 */
641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
642 {
643   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
644   Mat            A           = aij->A; /* diagonal part of the matrix */
645   Mat            B           = aij->B; /* offdiagonal part of the matrix */
646   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
647   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
648   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
649   PetscInt       *ailen      = a->ilen,*aj = a->j;
650   PetscInt       *bilen      = b->ilen,*bj = b->j;
651   PetscInt       am          = aij->A->rmap->n,j;
652   PetscInt       diag_so_far = 0,dnz;
653   PetscInt       offd_so_far = 0,onz;
654 
655   PetscFunctionBegin;
656   /* Iterate over all rows of the matrix */
657   for (j=0; j<am; j++) {
658     dnz = onz = 0;
659     /*  Iterate over all non-zero columns of the current row */
660     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
661       /* If column is in the diagonal */
662       if (mat_j[col] >= cstart && mat_j[col] < cend) {
663         aj[diag_so_far++] = mat_j[col] - cstart;
664         dnz++;
665       } else { /* off-diagonal entries */
666         bj[offd_so_far++] = mat_j[col];
667         onz++;
668       }
669     }
670     ailen[j] = dnz;
671     bilen[j] = onz;
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 /*
677     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
678     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
679     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
680     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
681     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
682 */
683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
684 {
685   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
686   Mat            A      = aij->A; /* diagonal part of the matrix */
687   Mat            B      = aij->B; /* offdiagonal part of the matrix */
688   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
689   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
690   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
691   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
692   PetscInt       *ailen = a->ilen,*aj = a->j;
693   PetscInt       *bilen = b->ilen,*bj = b->j;
694   PetscInt       am     = aij->A->rmap->n,j;
695   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
696   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
697   PetscScalar    *aa = a->a,*ba = b->a;
698 
699   PetscFunctionBegin;
700   /* Iterate over all rows of the matrix */
701   for (j=0; j<am; j++) {
702     dnz_row = onz_row = 0;
703     rowstart_offd = full_offd_i[j];
704     rowstart_diag = full_diag_i[j];
705     /*  Iterate over all non-zero columns of the current row */
706     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
707       /* If column is in the diagonal */
708       if (mat_j[col] >= cstart && mat_j[col] < cend) {
709         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
710         aa[rowstart_diag+dnz_row] = mat_a[col];
711         dnz_row++;
712       } else { /* off-diagonal entries */
713         bj[rowstart_offd+onz_row] = mat_j[col];
714         ba[rowstart_offd+onz_row] = mat_a[col];
715         onz_row++;
716       }
717     }
718     ailen[j] = dnz_row;
719     bilen[j] = onz_row;
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
725 {
726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
727   PetscErrorCode ierr;
728   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
729   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
730 
731   PetscFunctionBegin;
732   for (i=0; i<m; i++) {
733     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
734     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
735     if (idxm[i] >= rstart && idxm[i] < rend) {
736       row = idxm[i] - rstart;
737       for (j=0; j<n; j++) {
738         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
739         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
740         if (idxn[j] >= cstart && idxn[j] < cend) {
741           col  = idxn[j] - cstart;
742           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
743         } else {
744           if (!aij->colmap) {
745             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
746           }
747 #if defined(PETSC_USE_CTABLE)
748           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
749           col--;
750 #else
751           col = aij->colmap[idxn[j]] - 1;
752 #endif
753           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
754           else {
755             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
756           }
757         }
758       }
759     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
765 
766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
767 {
768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
769   PetscErrorCode ierr;
770   PetscInt       nstash,reallocs;
771 
772   PetscFunctionBegin;
773   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
774 
775   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
776   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
777   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
782 {
783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
784   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
785   PetscErrorCode ierr;
786   PetscMPIInt    n;
787   PetscInt       i,j,rstart,ncols,flg;
788   PetscInt       *row,*col;
789   PetscBool      other_disassembled;
790   PetscScalar    *val;
791 
792   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
793 
794   PetscFunctionBegin;
795   if (!aij->donotstash && !mat->nooffprocentries) {
796     while (1) {
797       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
798       if (!flg) break;
799 
800       for (i=0; i<n; ) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         /* Now assemble all these values with a single function call */
808         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
809 
810         i = j;
811       }
812     }
813     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
814   }
815   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
816   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
817 
818   /* determine if any processor has disassembled, if so we must
819      also disassemble ourselfs, in order that we may reassemble. */
820   /*
821      if nonzero structure of submatrix B cannot change then we know that
822      no processor disassembled thus we can skip this stuff
823   */
824   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
825     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
826     if (mat->was_assembled && !other_disassembled) {
827       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
828     }
829   }
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
831     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
832   }
833   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
834   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
835   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
836 
837   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
838 
839   aij->rowvalues = 0;
840 
841   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
842   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
843 
844   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
846     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
859   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
864 {
865   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
866   PetscObjectState sA, sB;
867   PetscInt        *lrows;
868   PetscInt         r, len;
869   PetscBool        cong, lch, gch;
870   PetscErrorCode   ierr;
871 
872   PetscFunctionBegin;
873   /* get locally owned rows */
874   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
875   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
876   /* fix right hand side if needed */
877   if (x && b) {
878     const PetscScalar *xx;
879     PetscScalar       *bb;
880 
881     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
882     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
883     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
884     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
885     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
886     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
887   }
888 
889   sA = mat->A->nonzerostate;
890   sB = mat->B->nonzerostate;
891 
892   if (diag != 0.0 && cong) {
893     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
894     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
895   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
896     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
897     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
898     PetscInt   nnwA, nnwB;
899     PetscBool  nnzA, nnzB;
900 
901     nnwA = aijA->nonew;
902     nnwB = aijB->nonew;
903     nnzA = aijA->keepnonzeropattern;
904     nnzB = aijB->keepnonzeropattern;
905     if (!nnzA) {
906       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
907       aijA->nonew = 0;
908     }
909     if (!nnzB) {
910       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
911       aijB->nonew = 0;
912     }
913     /* Must zero here before the next loop */
914     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
915     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) {
917       const PetscInt row = lrows[r] + A->rmap->rstart;
918       if (row >= A->cmap->N) continue;
919       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
920     }
921     aijA->nonew = nnwA;
922     aijB->nonew = nnwB;
923   } else {
924     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
925     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
926   }
927   ierr = PetscFree(lrows);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
930 
931   /* reduce nonzerostate */
932   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
933   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934   if (gch) A->nonzerostate++;
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
939 {
940   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
941   PetscErrorCode    ierr;
942   PetscMPIInt       n = A->rmap->n;
943   PetscInt          i,j,r,m,p = 0,len = 0;
944   PetscInt          *lrows,*owners = A->rmap->range;
945   PetscSFNode       *rrows;
946   PetscSF           sf;
947   const PetscScalar *xx;
948   PetscScalar       *bb,*mask;
949   Vec               xmask,lmask;
950   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
951   const PetscInt    *aj, *ii,*ridx;
952   PetscScalar       *aa;
953 
954   PetscFunctionBegin;
955   /* Create SF where leaves are input rows and roots are owned rows */
956   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
957   for (r = 0; r < n; ++r) lrows[r] = -1;
958   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
959   for (r = 0; r < N; ++r) {
960     const PetscInt idx   = rows[r];
961     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
962     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
963       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
964     }
965     rrows[r].rank  = p;
966     rrows[r].index = rows[r] - owners[p];
967   }
968   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
969   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
970   /* Collect flags for rows to be zeroed */
971   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
972   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
973   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
974   /* Compress and put in row numbers */
975   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
976   /* zero diagonal part of matrix */
977   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
978   /* handle off diagonal part of matrix */
979   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
980   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
981   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
982   for (i=0; i<len; i++) bb[lrows[i]] = 1;
983   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
984   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
987   if (x && b) { /* this code is buggy when the row and column layout don't match */
988     PetscBool cong;
989 
990     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
991     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
992     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
995     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
996   }
997   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
998   /* remove zeroed rows of off diagonal matrix */
999   ii = aij->i;
1000   for (i=0; i<len; i++) {
1001     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1002   }
1003   /* loop over all elements of off process part of matrix zeroing removed columns*/
1004   if (aij->compressedrow.use) {
1005     m    = aij->compressedrow.nrows;
1006     ii   = aij->compressedrow.i;
1007     ridx = aij->compressedrow.rindex;
1008     for (i=0; i<m; i++) {
1009       n  = ii[i+1] - ii[i];
1010       aj = aij->j + ii[i];
1011       aa = aij->a + ii[i];
1012 
1013       for (j=0; j<n; j++) {
1014         if (PetscAbsScalar(mask[*aj])) {
1015           if (b) bb[*ridx] -= *aa*xx[*aj];
1016           *aa = 0.0;
1017         }
1018         aa++;
1019         aj++;
1020       }
1021       ridx++;
1022     }
1023   } else { /* do not use compressed row format */
1024     m = l->B->rmap->n;
1025     for (i=0; i<m; i++) {
1026       n  = ii[i+1] - ii[i];
1027       aj = aij->j + ii[i];
1028       aa = aij->a + ii[i];
1029       for (j=0; j<n; j++) {
1030         if (PetscAbsScalar(mask[*aj])) {
1031           if (b) bb[i] -= *aa*xx[*aj];
1032           *aa = 0.0;
1033         }
1034         aa++;
1035         aj++;
1036       }
1037     }
1038   }
1039   if (x && b) {
1040     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1041     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1042   }
1043   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1044   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1045   ierr = PetscFree(lrows);CHKERRQ(ierr);
1046 
1047   /* only change matrix nonzero state if pattern was allowed to be changed */
1048   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1049     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1050     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1056 {
1057   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1058   PetscErrorCode ierr;
1059   PetscInt       nt;
1060   VecScatter     Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1064   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1065 
1066   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1067   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1068   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1074 {
1075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087   VecScatter     Mvctx = a->Mvctx;
1088 
1089   PetscFunctionBegin;
1090   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1091   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1093   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1094   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   /* do nondiagonal part */
1105   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1106   /* do local part */
1107   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1108   /* add partial results together */
1109   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1110   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1115 {
1116   MPI_Comm       comm;
1117   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1118   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1119   IS             Me,Notme;
1120   PetscErrorCode ierr;
1121   PetscInt       M,N,first,last,*notme,i;
1122   PetscBool      lf;
1123   PetscMPIInt    size;
1124 
1125   PetscFunctionBegin;
1126   /* Easy test: symmetric diagonal block */
1127   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1128   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1129   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1130   if (!*f) PetscFunctionReturn(0);
1131   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1132   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1133   if (size == 1) PetscFunctionReturn(0);
1134 
1135   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1136   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1137   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1138   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1139   for (i=0; i<first; i++) notme[i] = i;
1140   for (i=last; i<M; i++) notme[i-last+first] = i;
1141   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1142   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1143   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1144   Aoff = Aoffs[0];
1145   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1146   Boff = Boffs[0];
1147   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1148   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1149   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1150   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1151   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1152   ierr = PetscFree(notme);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1157 {
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1166 {
1167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1168   PetscErrorCode ierr;
1169 
1170   PetscFunctionBegin;
1171   /* do nondiagonal part */
1172   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1173   /* do local part */
1174   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1175   /* add partial results together */
1176   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1177   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /*
1182   This only works correctly for square matrices where the subblock A->A is the
1183    diagonal block
1184 */
1185 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1186 {
1187   PetscErrorCode ierr;
1188   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1189 
1190   PetscFunctionBegin;
1191   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1192   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1193   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1204   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1209 {
1210   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214 #if defined(PETSC_USE_LOG)
1215   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1216 #endif
1217   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1218   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1219   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1220   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1221 #if defined(PETSC_USE_CTABLE)
1222   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1223 #else
1224   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1225 #endif
1226   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1227   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1228   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1229   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1230   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1231   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1232   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1233 
1234   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1243 #if defined(PETSC_HAVE_ELEMENTAL)
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1245 #endif
1246 #if defined(PETSC_HAVE_HYPRE)
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1249 #endif
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1259   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1260   PetscErrorCode ierr;
1261   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1262   int            fd;
1263   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1264   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1265   PetscScalar    *column_values;
1266   PetscInt       message_count,flowcontrolcount;
1267   FILE           *file;
1268 
1269   PetscFunctionBegin;
1270   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1272   nz   = A->nz + B->nz;
1273   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1274   if (!rank) {
1275     header[0] = MAT_FILE_CLASSID;
1276     header[1] = mat->rmap->N;
1277     header[2] = mat->cmap->N;
1278 
1279     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     /* get largest number of rows any processor has */
1282     rlen  = mat->rmap->n;
1283     range = mat->rmap->range;
1284     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1285   } else {
1286     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     rlen = mat->rmap->n;
1288   }
1289 
1290   /* load up the local row counts */
1291   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1292   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1293 
1294   /* store the row lengths to the file */
1295   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1296   if (!rank) {
1297     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       rlen = range[i+1] - range[i];
1301       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1303     }
1304     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1307     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1308     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1309   }
1310   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1311 
1312   /* load up the local column indices */
1313   nzmax = nz; /* th processor needs space a largest processor needs */
1314   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1316   cnt   = 0;
1317   for (i=0; i<mat->rmap->n; i++) {
1318     for (j=B->i[i]; j<B->i[i+1]; j++) {
1319       if ((col = garray[B->j[j]]) > cstart) break;
1320       column_indices[cnt++] = col;
1321     }
1322     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1323     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1324   }
1325   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1326 
1327   /* store the column indices to the file */
1328   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1329   if (!rank) {
1330     MPI_Status status;
1331     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     for (i=1; i<size; i++) {
1333       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1334       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1335       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1336       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1337       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1338     }
1339     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1340   } else {
1341     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1342     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1343     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1347 
1348   /* load up the local column values */
1349   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1350   cnt  = 0;
1351   for (i=0; i<mat->rmap->n; i++) {
1352     for (j=B->i[i]; j<B->i[i+1]; j++) {
1353       if (garray[B->j[j]] > cstart) break;
1354       column_values[cnt++] = B->a[j];
1355     }
1356     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1357     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1358   }
1359   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1360 
1361   /* store the column values to the file */
1362   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1363   if (!rank) {
1364     MPI_Status status;
1365     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1366     for (i=1; i<size; i++) {
1367       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1368       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1369       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1370       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1371       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1372     }
1373     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1374   } else {
1375     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1376     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1379   }
1380   ierr = PetscFree(column_values);CHKERRQ(ierr);
1381 
1382   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1383   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #include <petscdraw.h>
1388 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1389 {
1390   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1391   PetscErrorCode    ierr;
1392   PetscMPIInt       rank = aij->rank,size = aij->size;
1393   PetscBool         isdraw,iascii,isbinary;
1394   PetscViewer       sviewer;
1395   PetscViewerFormat format;
1396 
1397   PetscFunctionBegin;
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1401   if (iascii) {
1402     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1403     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1404       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1405       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1406       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1407       for (i=0; i<(PetscInt)size; i++) {
1408         nmax = PetscMax(nmax,nz[i]);
1409         nmin = PetscMin(nmin,nz[i]);
1410         navg += nz[i];
1411       }
1412       ierr = PetscFree(nz);CHKERRQ(ierr);
1413       navg = navg/size;
1414       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1415       PetscFunctionReturn(0);
1416     }
1417     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1418     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1419       MatInfo   info;
1420       PetscBool inodes;
1421 
1422       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1423       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1424       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1425       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1426       if (!inodes) {
1427         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1428                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1429       } else {
1430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1431                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1432       }
1433       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1434       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1435       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1436       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1437       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1438       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1440       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1441       PetscFunctionReturn(0);
1442     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1443       PetscInt inodecount,inodelimit,*inodes;
1444       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1445       if (inodes) {
1446         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1447       } else {
1448         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1449       }
1450       PetscFunctionReturn(0);
1451     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1452       PetscFunctionReturn(0);
1453     }
1454   } else if (isbinary) {
1455     if (size == 1) {
1456       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1457       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1458     } else {
1459       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1460     }
1461     PetscFunctionReturn(0);
1462   } else if (iascii && size == 1) {
1463     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1464     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1465     PetscFunctionReturn(0);
1466   } else if (isdraw) {
1467     PetscDraw draw;
1468     PetscBool isnull;
1469     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1470     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1471     if (isnull) PetscFunctionReturn(0);
1472   }
1473 
1474   { /* assemble the entire matrix onto first processor */
1475     Mat A = NULL, Av;
1476     IS  isrow,iscol;
1477 
1478     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1479     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1480     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1481     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1482 /*  The commented code uses MatCreateSubMatrices instead */
1483 /*
1484     Mat *AA, A = NULL, Av;
1485     IS  isrow,iscol;
1486 
1487     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1488     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1489     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1490     if (!rank) {
1491        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1492        A    = AA[0];
1493        Av   = AA[0];
1494     }
1495     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1496 */
1497     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1498     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1499     /*
1500        Everyone has to call to draw the matrix since the graphics waits are
1501        synchronized across all processors that share the PetscDraw object
1502     */
1503     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1504     if (!rank) {
1505       if (((PetscObject)mat)->name) {
1506         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       }
1508       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1509     }
1510     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1511     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1512     ierr = MatDestroy(&A);CHKERRQ(ierr);
1513   }
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1518 {
1519   PetscErrorCode ierr;
1520   PetscBool      iascii,isdraw,issocket,isbinary;
1521 
1522   PetscFunctionBegin;
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1527   if (iascii || isdraw || isbinary || issocket) {
1528     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1534 {
1535   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1536   PetscErrorCode ierr;
1537   Vec            bb1 = 0;
1538   PetscBool      hasop;
1539 
1540   PetscFunctionBegin;
1541   if (flag == SOR_APPLY_UPPER) {
1542     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1543     PetscFunctionReturn(0);
1544   }
1545 
1546   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1547     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1548   }
1549 
1550   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1551     if (flag & SOR_ZERO_INITIAL_GUESS) {
1552       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1553       its--;
1554     }
1555 
1556     while (its--) {
1557       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1559 
1560       /* update rhs: bb1 = bb - B*x */
1561       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1562       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1563 
1564       /* local sweep */
1565       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1566     }
1567   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1568     if (flag & SOR_ZERO_INITIAL_GUESS) {
1569       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1570       its--;
1571     }
1572     while (its--) {
1573       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1575 
1576       /* update rhs: bb1 = bb - B*x */
1577       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1578       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1579 
1580       /* local sweep */
1581       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1582     }
1583   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1584     if (flag & SOR_ZERO_INITIAL_GUESS) {
1585       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1586       its--;
1587     }
1588     while (its--) {
1589       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1591 
1592       /* update rhs: bb1 = bb - B*x */
1593       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1594       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1595 
1596       /* local sweep */
1597       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1598     }
1599   } else if (flag & SOR_EISENSTAT) {
1600     Vec xx1;
1601 
1602     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1603     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1604 
1605     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607     if (!mat->diag) {
1608       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1609       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1610     }
1611     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1612     if (hasop) {
1613       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1614     } else {
1615       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1616     }
1617     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1618 
1619     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1620 
1621     /* local sweep */
1622     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1623     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1624     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1625   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1626 
1627   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1628 
1629   matin->factorerrortype = mat->A->factorerrortype;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1634 {
1635   Mat            aA,aB,Aperm;
1636   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1637   PetscScalar    *aa,*ba;
1638   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1639   PetscSF        rowsf,sf;
1640   IS             parcolp = NULL;
1641   PetscBool      done;
1642   PetscErrorCode ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1646   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1647   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1648   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1649 
1650   /* Invert row permutation to find out where my rows should go */
1651   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1652   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1653   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1655   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1657 
1658   /* Invert column permutation to find out where my columns should go */
1659   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1660   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1661   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1662   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1663   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1665   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1666 
1667   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1668   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1669   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1670 
1671   /* Find out where my gcols should go */
1672   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1673   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1674   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1675   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1676   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1677   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1679   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1680 
1681   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1683   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1684   for (i=0; i<m; i++) {
1685     PetscInt row = rdest[i],rowner;
1686     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1687     for (j=ai[i]; j<ai[i+1]; j++) {
1688       PetscInt cowner,col = cdest[aj[j]];
1689       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1690       if (rowner == cowner) dnnz[i]++;
1691       else onnz[i]++;
1692     }
1693     for (j=bi[i]; j<bi[i+1]; j++) {
1694       PetscInt cowner,col = gcdest[bj[j]];
1695       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1696       if (rowner == cowner) dnnz[i]++;
1697       else onnz[i]++;
1698     }
1699   }
1700   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1704   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1705 
1706   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1708   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1709   for (i=0; i<m; i++) {
1710     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1711     PetscInt j0,rowlen;
1712     rowlen = ai[i+1] - ai[i];
1713     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1714       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1715       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1716     }
1717     rowlen = bi[i+1] - bi[i];
1718     for (j0=j=0; j<rowlen; j0=j) {
1719       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1720       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1721     }
1722   }
1723   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1726   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1728   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1729   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1730   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1731   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1732   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1733   *B = Aperm;
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1738 {
1739   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1744   if (ghosts) *ghosts = aij->garray;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1749 {
1750   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1751   Mat            A    = mat->A,B = mat->B;
1752   PetscErrorCode ierr;
1753   PetscReal      isend[5],irecv[5];
1754 
1755   PetscFunctionBegin;
1756   info->block_size = 1.0;
1757   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1758 
1759   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1760   isend[3] = info->memory;  isend[4] = info->mallocs;
1761 
1762   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1763 
1764   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1765   isend[3] += info->memory;  isend[4] += info->mallocs;
1766   if (flag == MAT_LOCAL) {
1767     info->nz_used      = isend[0];
1768     info->nz_allocated = isend[1];
1769     info->nz_unneeded  = isend[2];
1770     info->memory       = isend[3];
1771     info->mallocs      = isend[4];
1772   } else if (flag == MAT_GLOBAL_MAX) {
1773     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1774 
1775     info->nz_used      = irecv[0];
1776     info->nz_allocated = irecv[1];
1777     info->nz_unneeded  = irecv[2];
1778     info->memory       = irecv[3];
1779     info->mallocs      = irecv[4];
1780   } else if (flag == MAT_GLOBAL_SUM) {
1781     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1782 
1783     info->nz_used      = irecv[0];
1784     info->nz_allocated = irecv[1];
1785     info->nz_unneeded  = irecv[2];
1786     info->memory       = irecv[3];
1787     info->mallocs      = irecv[4];
1788   }
1789   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1790   info->fill_ratio_needed = 0;
1791   info->factor_mallocs    = 0;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1796 {
1797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1798   PetscErrorCode ierr;
1799 
1800   PetscFunctionBegin;
1801   switch (op) {
1802   case MAT_NEW_NONZERO_LOCATIONS:
1803   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1804   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1805   case MAT_KEEP_NONZERO_PATTERN:
1806   case MAT_NEW_NONZERO_LOCATION_ERR:
1807   case MAT_USE_INODES:
1808   case MAT_IGNORE_ZERO_ENTRIES:
1809     MatCheckPreallocated(A,1);
1810     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1811     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_ROW_ORIENTED:
1814     MatCheckPreallocated(A,1);
1815     a->roworiented = flg;
1816 
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_NEW_DIAGONALS:
1821     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1822     break;
1823   case MAT_IGNORE_OFF_PROC_ENTRIES:
1824     a->donotstash = flg;
1825     break;
1826   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1827   case MAT_SPD:
1828   case MAT_SYMMETRIC:
1829   case MAT_STRUCTURALLY_SYMMETRIC:
1830   case MAT_HERMITIAN:
1831   case MAT_SYMMETRY_ETERNAL:
1832     break;
1833   case MAT_SUBMAT_SINGLEIS:
1834     A->submat_singleis = flg;
1835     break;
1836   case MAT_STRUCTURE_ONLY:
1837     /* The option is handled directly by MatSetOption() */
1838     break;
1839   default:
1840     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1841   }
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1848   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1849   PetscErrorCode ierr;
1850   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1851   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1852   PetscInt       *cmap,*idx_p;
1853 
1854   PetscFunctionBegin;
1855   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1856   mat->getrowactive = PETSC_TRUE;
1857 
1858   if (!mat->rowvalues && (idx || v)) {
1859     /*
1860         allocate enough space to hold information from the longest row.
1861     */
1862     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1863     PetscInt   max = 1,tmp;
1864     for (i=0; i<matin->rmap->n; i++) {
1865       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1866       if (max < tmp) max = tmp;
1867     }
1868     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1869   }
1870 
1871   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1872   lrow = row - rstart;
1873 
1874   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1875   if (!v)   {pvA = 0; pvB = 0;}
1876   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1877   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   nztot = nzA + nzB;
1880 
1881   cmap = mat->garray;
1882   if (v  || idx) {
1883     if (nztot) {
1884       /* Sort by increasing column numbers, assuming A and B already sorted */
1885       PetscInt imark = -1;
1886       if (v) {
1887         *v = v_p = mat->rowvalues;
1888         for (i=0; i<nzB; i++) {
1889           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1890           else break;
1891         }
1892         imark = i;
1893         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1894         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1895       }
1896       if (idx) {
1897         *idx = idx_p = mat->rowindices;
1898         if (imark > -1) {
1899           for (i=0; i<imark; i++) {
1900             idx_p[i] = cmap[cworkB[i]];
1901           }
1902         } else {
1903           for (i=0; i<nzB; i++) {
1904             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1905             else break;
1906           }
1907           imark = i;
1908         }
1909         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1910         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1911       }
1912     } else {
1913       if (idx) *idx = 0;
1914       if (v)   *v   = 0;
1915     }
1916   }
1917   *nz  = nztot;
1918   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1919   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1924 {
1925   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1926 
1927   PetscFunctionBegin;
1928   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1929   aij->getrowactive = PETSC_FALSE;
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1934 {
1935   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1936   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1937   PetscErrorCode ierr;
1938   PetscInt       i,j,cstart = mat->cmap->rstart;
1939   PetscReal      sum = 0.0;
1940   MatScalar      *v;
1941 
1942   PetscFunctionBegin;
1943   if (aij->size == 1) {
1944     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1945   } else {
1946     if (type == NORM_FROBENIUS) {
1947       v = amat->a;
1948       for (i=0; i<amat->nz; i++) {
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950       }
1951       v = bmat->a;
1952       for (i=0; i<bmat->nz; i++) {
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954       }
1955       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1956       *norm = PetscSqrtReal(*norm);
1957       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1958     } else if (type == NORM_1) { /* max column norm */
1959       PetscReal *tmp,*tmp2;
1960       PetscInt  *jj,*garray = aij->garray;
1961       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1962       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1963       *norm = 0.0;
1964       v     = amat->a; jj = amat->j;
1965       for (j=0; j<amat->nz; j++) {
1966         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1967       }
1968       v = bmat->a; jj = bmat->j;
1969       for (j=0; j<bmat->nz; j++) {
1970         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1971       }
1972       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1973       for (j=0; j<mat->cmap->N; j++) {
1974         if (tmp2[j] > *norm) *norm = tmp2[j];
1975       }
1976       ierr = PetscFree(tmp);CHKERRQ(ierr);
1977       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1978       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1979     } else if (type == NORM_INFINITY) { /* max row norm */
1980       PetscReal ntemp = 0.0;
1981       for (j=0; j<aij->A->rmap->n; j++) {
1982         v   = amat->a + amat->i[j];
1983         sum = 0.0;
1984         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1985           sum += PetscAbsScalar(*v); v++;
1986         }
1987         v = bmat->a + bmat->i[j];
1988         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1989           sum += PetscAbsScalar(*v); v++;
1990         }
1991         if (sum > ntemp) ntemp = sum;
1992       }
1993       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2001 {
2002   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2003   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2004   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2005   PetscErrorCode ierr;
2006   Mat            B,A_diag,*B_diag;
2007   MatScalar      *array;
2008 
2009   PetscFunctionBegin;
2010   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2011   ai = Aloc->i; aj = Aloc->j;
2012   bi = Bloc->i; bj = Bloc->j;
2013   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2014     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2015     PetscSFNode          *oloc;
2016     PETSC_UNUSED PetscSF sf;
2017 
2018     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2019     /* compute d_nnz for preallocation */
2020     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<ai[ma]; i++) {
2022       d_nnz[aj[i]]++;
2023     }
2024     /* compute local off-diagonal contributions */
2025     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2026     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2027     /* map those to global */
2028     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2029     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2030     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2031     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2032     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2034     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2035 
2036     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2037     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2038     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2039     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2040     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2041     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2042   } else {
2043     B    = *matout;
2044     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2045   }
2046 
2047   b           = (Mat_MPIAIJ*)B->data;
2048   A_diag      = a->A;
2049   B_diag      = &b->A;
2050   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2051   A_diag_ncol = A_diag->cmap->N;
2052   B_diag_ilen = sub_B_diag->ilen;
2053   B_diag_i    = sub_B_diag->i;
2054 
2055   /* Set ilen for diagonal of B */
2056   for (i=0; i<A_diag_ncol; i++) {
2057     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2058   }
2059 
2060   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2061   very quickly (=without using MatSetValues), because all writes are local. */
2062   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2063 
2064   /* copy over the B part */
2065   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2066   array = Bloc->a;
2067   row   = A->rmap->rstart;
2068   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2069   cols_tmp = cols;
2070   for (i=0; i<mb; i++) {
2071     ncol = bi[i+1]-bi[i];
2072     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2073     row++;
2074     array += ncol; cols_tmp += ncol;
2075   }
2076   ierr = PetscFree(cols);CHKERRQ(ierr);
2077 
2078   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2081     *matout = B;
2082   } else {
2083     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2089 {
2090   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2091   Mat            a    = aij->A,b = aij->B;
2092   PetscErrorCode ierr;
2093   PetscInt       s1,s2,s3;
2094 
2095   PetscFunctionBegin;
2096   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2097   if (rr) {
2098     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2099     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2100     /* Overlap communication with computation. */
2101     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2102   }
2103   if (ll) {
2104     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2105     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2106     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2107   }
2108   /* scale  the diagonal block */
2109   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2110 
2111   if (rr) {
2112     /* Do a scatter end and then right scale the off-diagonal block */
2113     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2114     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2115   }
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2120 {
2121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2122   PetscErrorCode ierr;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2130 {
2131   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2132   Mat            a,b,c,d;
2133   PetscBool      flg;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   a = matA->A; b = matA->B;
2138   c = matB->A; d = matB->B;
2139 
2140   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2141   if (flg) {
2142     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2143   }
2144   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2149 {
2150   PetscErrorCode ierr;
2151   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2152   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2153 
2154   PetscFunctionBegin;
2155   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2156   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2157     /* because of the column compression in the off-processor part of the matrix a->B,
2158        the number of columns in a->B and b->B may be different, hence we cannot call
2159        the MatCopy() directly on the two parts. If need be, we can provide a more
2160        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2161        then copying the submatrices */
2162     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2163   } else {
2164     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2165     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2166   }
2167   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2172 {
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /*
2181    Computes the number of nonzeros per row needed for preallocation when X and Y
2182    have different nonzero structure.
2183 */
2184 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2185 {
2186   PetscInt       i,j,k,nzx,nzy;
2187 
2188   PetscFunctionBegin;
2189   /* Set the number of nonzeros in the new matrix */
2190   for (i=0; i<m; i++) {
2191     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2192     nzx = xi[i+1] - xi[i];
2193     nzy = yi[i+1] - yi[i];
2194     nnz[i] = 0;
2195     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2196       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2197       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2198       nnz[i]++;
2199     }
2200     for (; k<nzy; k++) nnz[i]++;
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2206 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2207 {
2208   PetscErrorCode ierr;
2209   PetscInt       m = Y->rmap->N;
2210   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2211   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2212 
2213   PetscFunctionBegin;
2214   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2219 {
2220   PetscErrorCode ierr;
2221   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2222   PetscBLASInt   bnz,one=1;
2223   Mat_SeqAIJ     *x,*y;
2224 
2225   PetscFunctionBegin;
2226   if (str == SAME_NONZERO_PATTERN) {
2227     PetscScalar alpha = a;
2228     x    = (Mat_SeqAIJ*)xx->A->data;
2229     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2230     y    = (Mat_SeqAIJ*)yy->A->data;
2231     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2232     x    = (Mat_SeqAIJ*)xx->B->data;
2233     y    = (Mat_SeqAIJ*)yy->B->data;
2234     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2235     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2236     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2237   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2238     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2239   } else {
2240     Mat      B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2248     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2250     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2251     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2252     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2253     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2255     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2261 
2262 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2263 {
2264 #if defined(PETSC_USE_COMPLEX)
2265   PetscErrorCode ierr;
2266   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2270   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2271 #else
2272   PetscFunctionBegin;
2273 #endif
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2278 {
2279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2284   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2289 {
2290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2295   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2300 {
2301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2302   PetscErrorCode ierr;
2303   PetscInt       i,*idxb = 0;
2304   PetscScalar    *va,*vb;
2305   Vec            vtmp;
2306 
2307   PetscFunctionBegin;
2308   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2309   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2310   if (idx) {
2311     for (i=0; i<A->rmap->n; i++) {
2312       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2313     }
2314   }
2315 
2316   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2317   if (idx) {
2318     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2319   }
2320   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2321   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2322 
2323   for (i=0; i<A->rmap->n; i++) {
2324     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2325       va[i] = vb[i];
2326       if (idx) idx[i] = a->garray[idxb[i]];
2327     }
2328   }
2329 
2330   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2331   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2332   ierr = PetscFree(idxb);CHKERRQ(ierr);
2333   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2338 {
2339   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,*idxb = 0;
2342   PetscScalar    *va,*vb;
2343   Vec            vtmp;
2344 
2345   PetscFunctionBegin;
2346   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2347   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2348   if (idx) {
2349     for (i=0; i<A->cmap->n; i++) {
2350       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2351     }
2352   }
2353 
2354   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2355   if (idx) {
2356     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2357   }
2358   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2359   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2360 
2361   for (i=0; i<A->rmap->n; i++) {
2362     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2363       va[i] = vb[i];
2364       if (idx) idx[i] = a->garray[idxb[i]];
2365     }
2366   }
2367 
2368   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2369   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2370   ierr = PetscFree(idxb);CHKERRQ(ierr);
2371   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2376 {
2377   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2378   PetscInt       n      = A->rmap->n;
2379   PetscInt       cstart = A->cmap->rstart;
2380   PetscInt       *cmap  = mat->garray;
2381   PetscInt       *diagIdx, *offdiagIdx;
2382   Vec            diagV, offdiagV;
2383   PetscScalar    *a, *diagA, *offdiagA;
2384   PetscInt       r;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2391   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2392   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2394   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2395   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   for (r = 0; r < n; ++r) {
2397     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2398       a[r]   = diagA[r];
2399       idx[r] = cstart + diagIdx[r];
2400     } else {
2401       a[r]   = offdiagA[r];
2402       idx[r] = cmap[offdiagIdx[r]];
2403     }
2404   }
2405   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2409   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2410   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2415 {
2416   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2417   PetscInt       n      = A->rmap->n;
2418   PetscInt       cstart = A->cmap->rstart;
2419   PetscInt       *cmap  = mat->garray;
2420   PetscInt       *diagIdx, *offdiagIdx;
2421   Vec            diagV, offdiagV;
2422   PetscScalar    *a, *diagA, *offdiagA;
2423   PetscInt       r;
2424   PetscErrorCode ierr;
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2428   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2429   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2430   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2431   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2432   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2433   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2434   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2435   for (r = 0; r < n; ++r) {
2436     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2437       a[r]   = diagA[r];
2438       idx[r] = cstart + diagIdx[r];
2439     } else {
2440       a[r]   = offdiagA[r];
2441       idx[r] = cmap[offdiagIdx[r]];
2442     }
2443   }
2444   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2446   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2447   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2448   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2449   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2454 {
2455   PetscErrorCode ierr;
2456   Mat            *dummy;
2457 
2458   PetscFunctionBegin;
2459   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2460   *newmat = *dummy;
2461   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2466 {
2467   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2468   PetscErrorCode ierr;
2469 
2470   PetscFunctionBegin;
2471   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2472   A->factorerrortype = a->A->factorerrortype;
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2477 {
2478   PetscErrorCode ierr;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2480 
2481   PetscFunctionBegin;
2482   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2483   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2484   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2490 {
2491   PetscFunctionBegin;
2492   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2493   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 /*@
2498    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2499 
2500    Collective on Mat
2501 
2502    Input Parameters:
2503 +    A - the matrix
2504 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2505 
2506  Level: advanced
2507 
2508 @*/
2509 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2510 {
2511   PetscErrorCode       ierr;
2512 
2513   PetscFunctionBegin;
2514   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2519 {
2520   PetscErrorCode       ierr;
2521   PetscBool            sc = PETSC_FALSE,flg;
2522 
2523   PetscFunctionBegin;
2524   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2525   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2526   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2527   if (flg) {
2528     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2529   }
2530   ierr = PetscOptionsTail();CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2535 {
2536   PetscErrorCode ierr;
2537   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2538   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2539 
2540   PetscFunctionBegin;
2541   if (!Y->preallocated) {
2542     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2543   } else if (!aij->nz) {
2544     PetscInt nonew = aij->nonew;
2545     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2546     aij->nonew = nonew;
2547   }
2548   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2553 {
2554   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2555   PetscErrorCode ierr;
2556 
2557   PetscFunctionBegin;
2558   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2559   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2560   if (d) {
2561     PetscInt rstart;
2562     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2563     *d += rstart;
2564 
2565   }
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2570 {
2571   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2572   PetscErrorCode ierr;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 /* -------------------------------------------------------------------*/
2580 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2581                                        MatGetRow_MPIAIJ,
2582                                        MatRestoreRow_MPIAIJ,
2583                                        MatMult_MPIAIJ,
2584                                 /* 4*/ MatMultAdd_MPIAIJ,
2585                                        MatMultTranspose_MPIAIJ,
2586                                        MatMultTransposeAdd_MPIAIJ,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                 /*10*/ 0,
2591                                        0,
2592                                        0,
2593                                        MatSOR_MPIAIJ,
2594                                        MatTranspose_MPIAIJ,
2595                                 /*15*/ MatGetInfo_MPIAIJ,
2596                                        MatEqual_MPIAIJ,
2597                                        MatGetDiagonal_MPIAIJ,
2598                                        MatDiagonalScale_MPIAIJ,
2599                                        MatNorm_MPIAIJ,
2600                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2601                                        MatAssemblyEnd_MPIAIJ,
2602                                        MatSetOption_MPIAIJ,
2603                                        MatZeroEntries_MPIAIJ,
2604                                 /*24*/ MatZeroRows_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                 /*29*/ MatSetUp_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        MatGetDiagonalBlock_MPIAIJ,
2613                                        0,
2614                                 /*34*/ MatDuplicate_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*39*/ MatAXPY_MPIAIJ,
2620                                        MatCreateSubMatrices_MPIAIJ,
2621                                        MatIncreaseOverlap_MPIAIJ,
2622                                        MatGetValues_MPIAIJ,
2623                                        MatCopy_MPIAIJ,
2624                                 /*44*/ MatGetRowMax_MPIAIJ,
2625                                        MatScale_MPIAIJ,
2626                                        MatShift_MPIAIJ,
2627                                        MatDiagonalSet_MPIAIJ,
2628                                        MatZeroRowsColumns_MPIAIJ,
2629                                 /*49*/ MatSetRandom_MPIAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2635                                        0,
2636                                        MatSetUnfactored_MPIAIJ,
2637                                        MatPermute_MPIAIJ,
2638                                        0,
2639                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2640                                        MatDestroy_MPIAIJ,
2641                                        MatView_MPIAIJ,
2642                                        0,
2643                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2644                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2645                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2650                                        MatGetRowMinAbs_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*75*/ MatFDColoringApply_AIJ,
2656                                        MatSetFromOptions_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        MatFindZeroDiagonals_MPIAIJ,
2660                                 /*80*/ 0,
2661                                        0,
2662                                        0,
2663                                 /*83*/ MatLoad_MPIAIJ,
2664                                        MatIsSymmetric_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2670                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2671                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2672                                        MatPtAP_MPIAIJ_MPIAIJ,
2673                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2674                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                 /*99*/ 0,
2680                                        0,
2681                                        0,
2682                                        MatConjugate_MPIAIJ,
2683                                        0,
2684                                 /*104*/MatSetValuesRow_MPIAIJ,
2685                                        MatRealPart_MPIAIJ,
2686                                        MatImaginaryPart_MPIAIJ,
2687                                        0,
2688                                        0,
2689                                 /*109*/0,
2690                                        0,
2691                                        MatGetRowMin_MPIAIJ,
2692                                        0,
2693                                        MatMissingDiagonal_MPIAIJ,
2694                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2695                                        0,
2696                                        MatGetGhosts_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                 /*119*/0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        MatGetMultiProcBlock_MPIAIJ,
2704                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2705                                        MatGetColumnNorms_MPIAIJ,
2706                                        MatInvertBlockDiagonal_MPIAIJ,
2707                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2708                                        MatCreateSubMatricesMPI_MPIAIJ,
2709                                 /*129*/0,
2710                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2711                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2712                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2713                                        0,
2714                                 /*134*/0,
2715                                        0,
2716                                        MatRARt_MPIAIJ_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                 /*139*/MatSetBlockSizes_MPIAIJ,
2720                                        0,
2721                                        0,
2722                                        MatFDColoringSetUp_MPIXAIJ,
2723                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2724                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2725 };
2726 
2727 /* ----------------------------------------------------------------------------------------*/
2728 
2729 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2741 {
2742   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2747   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2752 {
2753   Mat_MPIAIJ     *b;
2754   PetscErrorCode ierr;
2755   PetscMPIInt    size;
2756 
2757   PetscFunctionBegin;
2758   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2759   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2760   b = (Mat_MPIAIJ*)B->data;
2761 
2762 #if defined(PETSC_USE_CTABLE)
2763   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2764 #else
2765   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2766 #endif
2767   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2768   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2769   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2770 
2771   /* Because the B will have been resized we simply destroy it and create a new one each time */
2772   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2773   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2774   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2775   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2776   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2777   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2778   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2779 
2780   if (!B->preallocated) {
2781     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2782     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2783     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2784     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2785     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2786   }
2787 
2788   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2789   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2790   B->preallocated  = PETSC_TRUE;
2791   B->was_assembled = PETSC_FALSE;
2792   B->assembled     = PETSC_FALSE;
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2797 {
2798   Mat_MPIAIJ     *b;
2799   PetscErrorCode ierr;
2800 
2801   PetscFunctionBegin;
2802   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2803   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2805   b = (Mat_MPIAIJ*)B->data;
2806 
2807 #if defined(PETSC_USE_CTABLE)
2808   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2809 #else
2810   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2811 #endif
2812   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2813   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2814   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2815 
2816   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2817   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2818   B->preallocated  = PETSC_TRUE;
2819   B->was_assembled = PETSC_FALSE;
2820   B->assembled = PETSC_FALSE;
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2825 {
2826   Mat            mat;
2827   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2828   PetscErrorCode ierr;
2829 
2830   PetscFunctionBegin;
2831   *newmat = 0;
2832   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2833   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2834   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2835   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2836   a       = (Mat_MPIAIJ*)mat->data;
2837 
2838   mat->factortype   = matin->factortype;
2839   mat->assembled    = PETSC_TRUE;
2840   mat->insertmode   = NOT_SET_VALUES;
2841   mat->preallocated = PETSC_TRUE;
2842 
2843   a->size         = oldmat->size;
2844   a->rank         = oldmat->rank;
2845   a->donotstash   = oldmat->donotstash;
2846   a->roworiented  = oldmat->roworiented;
2847   a->rowindices   = 0;
2848   a->rowvalues    = 0;
2849   a->getrowactive = PETSC_FALSE;
2850 
2851   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2852   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2853 
2854   if (oldmat->colmap) {
2855 #if defined(PETSC_USE_CTABLE)
2856     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2857 #else
2858     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2859     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2860     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2861 #endif
2862   } else a->colmap = 0;
2863   if (oldmat->garray) {
2864     PetscInt len;
2865     len  = oldmat->B->cmap->n;
2866     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2867     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2868     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2869   } else a->garray = 0;
2870 
2871   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2872   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2873   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2874   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2875 
2876   if (oldmat->Mvctx_mpi1) {
2877     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2878     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2879   }
2880 
2881   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2883   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2884   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2885   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2886   *newmat = mat;
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2891 {
2892   PetscBool      isbinary, ishdf5;
2893   PetscErrorCode ierr;
2894 
2895   PetscFunctionBegin;
2896   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2897   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2898   /* force binary viewer to load .info file if it has not yet done so */
2899   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2900   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2901   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2902   if (isbinary) {
2903     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2904   } else if (ishdf5) {
2905 #if defined(PETSC_HAVE_HDF5)
2906     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2907 #else
2908     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2909 #endif
2910   } else {
2911     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2912   }
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2917 {
2918   PetscScalar    *vals,*svals;
2919   MPI_Comm       comm;
2920   PetscErrorCode ierr;
2921   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2922   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2923   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2924   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2925   PetscInt       cend,cstart,n,*rowners;
2926   int            fd;
2927   PetscInt       bs = newMat->rmap->bs;
2928 
2929   PetscFunctionBegin;
2930   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2931   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2932   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2933   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2934   if (!rank) {
2935     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2936     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2937     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2938   }
2939 
2940   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2941   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2942   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2943   if (bs < 0) bs = 1;
2944 
2945   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2946   M    = header[1]; N = header[2];
2947 
2948   /* If global sizes are set, check if they are consistent with that given in the file */
2949   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2950   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2951 
2952   /* determine ownership of all (block) rows */
2953   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2954   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2955   else m = newMat->rmap->n; /* Set by user */
2956 
2957   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2958   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2959 
2960   /* First process needs enough room for process with most rows */
2961   if (!rank) {
2962     mmax = rowners[1];
2963     for (i=2; i<=size; i++) {
2964       mmax = PetscMax(mmax, rowners[i]);
2965     }
2966   } else mmax = -1;             /* unused, but compilers complain */
2967 
2968   rowners[0] = 0;
2969   for (i=2; i<=size; i++) {
2970     rowners[i] += rowners[i-1];
2971   }
2972   rstart = rowners[rank];
2973   rend   = rowners[rank+1];
2974 
2975   /* distribute row lengths to all processors */
2976   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2977   if (!rank) {
2978     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2979     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2980     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2981     for (j=0; j<m; j++) {
2982       procsnz[0] += ourlens[j];
2983     }
2984     for (i=1; i<size; i++) {
2985       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2986       /* calculate the number of nonzeros on each processor */
2987       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2988         procsnz[i] += rowlengths[j];
2989       }
2990       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2991     }
2992     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2993   } else {
2994     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2995   }
2996 
2997   if (!rank) {
2998     /* determine max buffer needed and allocate it */
2999     maxnz = 0;
3000     for (i=0; i<size; i++) {
3001       maxnz = PetscMax(maxnz,procsnz[i]);
3002     }
3003     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3004 
3005     /* read in my part of the matrix column indices  */
3006     nz   = procsnz[0];
3007     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3008     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3009 
3010     /* read in every one elses and ship off */
3011     for (i=1; i<size; i++) {
3012       nz   = procsnz[i];
3013       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3014       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3015     }
3016     ierr = PetscFree(cols);CHKERRQ(ierr);
3017   } else {
3018     /* determine buffer space needed for message */
3019     nz = 0;
3020     for (i=0; i<m; i++) {
3021       nz += ourlens[i];
3022     }
3023     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3024 
3025     /* receive message of column indices*/
3026     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3027   }
3028 
3029   /* determine column ownership if matrix is not square */
3030   if (N != M) {
3031     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3032     else n = newMat->cmap->n;
3033     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3034     cstart = cend - n;
3035   } else {
3036     cstart = rstart;
3037     cend   = rend;
3038     n      = cend - cstart;
3039   }
3040 
3041   /* loop over local rows, determining number of off diagonal entries */
3042   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3043   jj   = 0;
3044   for (i=0; i<m; i++) {
3045     for (j=0; j<ourlens[i]; j++) {
3046       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3047       jj++;
3048     }
3049   }
3050 
3051   for (i=0; i<m; i++) {
3052     ourlens[i] -= offlens[i];
3053   }
3054   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3055 
3056   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3057 
3058   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3059 
3060   for (i=0; i<m; i++) {
3061     ourlens[i] += offlens[i];
3062   }
3063 
3064   if (!rank) {
3065     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3066 
3067     /* read in my part of the matrix numerical values  */
3068     nz   = procsnz[0];
3069     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3070 
3071     /* insert into matrix */
3072     jj      = rstart;
3073     smycols = mycols;
3074     svals   = vals;
3075     for (i=0; i<m; i++) {
3076       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3077       smycols += ourlens[i];
3078       svals   += ourlens[i];
3079       jj++;
3080     }
3081 
3082     /* read in other processors and ship out */
3083     for (i=1; i<size; i++) {
3084       nz   = procsnz[i];
3085       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3086       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3087     }
3088     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3089   } else {
3090     /* receive numeric values */
3091     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3092 
3093     /* receive message of values*/
3094     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3095 
3096     /* insert into matrix */
3097     jj      = rstart;
3098     smycols = mycols;
3099     svals   = vals;
3100     for (i=0; i<m; i++) {
3101       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3102       smycols += ourlens[i];
3103       svals   += ourlens[i];
3104       jj++;
3105     }
3106   }
3107   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3108   ierr = PetscFree(vals);CHKERRQ(ierr);
3109   ierr = PetscFree(mycols);CHKERRQ(ierr);
3110   ierr = PetscFree(rowners);CHKERRQ(ierr);
3111   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 /* Not scalable because of ISAllGather() unless getting all columns. */
3117 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3118 {
3119   PetscErrorCode ierr;
3120   IS             iscol_local;
3121   PetscBool      isstride;
3122   PetscMPIInt    lisstride=0,gisstride;
3123 
3124   PetscFunctionBegin;
3125   /* check if we are grabbing all columns*/
3126   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3127 
3128   if (isstride) {
3129     PetscInt  start,len,mstart,mlen;
3130     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3131     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3132     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3133     if (mstart == start && mlen-mstart == len) lisstride = 1;
3134   }
3135 
3136   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3137   if (gisstride) {
3138     PetscInt N;
3139     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3140     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3141     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3142     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3143   } else {
3144     PetscInt cbs;
3145     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3146     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3147     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3148   }
3149 
3150   *isseq = iscol_local;
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 /*
3155  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3156  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3157 
3158  Input Parameters:
3159    mat - matrix
3160    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3161            i.e., mat->rstart <= isrow[i] < mat->rend
3162    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3163            i.e., mat->cstart <= iscol[i] < mat->cend
3164  Output Parameter:
3165    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3166    iscol_o - sequential column index set for retrieving mat->B
3167    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3168  */
3169 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3170 {
3171   PetscErrorCode ierr;
3172   Vec            x,cmap;
3173   const PetscInt *is_idx;
3174   PetscScalar    *xarray,*cmaparray;
3175   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3176   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3177   Mat            B=a->B;
3178   Vec            lvec=a->lvec,lcmap;
3179   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3180   MPI_Comm       comm;
3181   VecScatter     Mvctx=a->Mvctx;
3182 
3183   PetscFunctionBegin;
3184   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3185   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3186 
3187   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3188   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3189   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3190   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3191   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3192 
3193   /* Get start indices */
3194   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3195   isstart -= ncols;
3196   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3197 
3198   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3199   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3200   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3201   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3202   for (i=0; i<ncols; i++) {
3203     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3204     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3205     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3206   }
3207   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3208   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3209   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3210 
3211   /* Get iscol_d */
3212   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3213   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3214   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3215 
3216   /* Get isrow_d */
3217   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3218   rstart = mat->rmap->rstart;
3219   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3220   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3221   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3222   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3223 
3224   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3225   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3226   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3227 
3228   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3229   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3230   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3231 
3232   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3233 
3234   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3236 
3237   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3238   /* off-process column indices */
3239   count = 0;
3240   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3241   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3242 
3243   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3244   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3245   for (i=0; i<Bn; i++) {
3246     if (PetscRealPart(xarray[i]) > -1.0) {
3247       idx[count]     = i;                   /* local column index in off-diagonal part B */
3248       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3249       count++;
3250     }
3251   }
3252   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3253   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3254 
3255   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3256   /* cannot ensure iscol_o has same blocksize as iscol! */
3257 
3258   ierr = PetscFree(idx);CHKERRQ(ierr);
3259   *garray = cmap1;
3260 
3261   ierr = VecDestroy(&x);CHKERRQ(ierr);
3262   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3263   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3268 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3269 {
3270   PetscErrorCode ierr;
3271   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3272   Mat            M = NULL;
3273   MPI_Comm       comm;
3274   IS             iscol_d,isrow_d,iscol_o;
3275   Mat            Asub = NULL,Bsub = NULL;
3276   PetscInt       n;
3277 
3278   PetscFunctionBegin;
3279   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3280 
3281   if (call == MAT_REUSE_MATRIX) {
3282     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3283     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3284     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3285 
3286     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3287     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3288 
3289     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3290     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3291 
3292     /* Update diagonal and off-diagonal portions of submat */
3293     asub = (Mat_MPIAIJ*)(*submat)->data;
3294     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3295     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3296     if (n) {
3297       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3298     }
3299     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3300     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301 
3302   } else { /* call == MAT_INITIAL_MATRIX) */
3303     const PetscInt *garray;
3304     PetscInt        BsubN;
3305 
3306     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3307     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3308 
3309     /* Create local submatrices Asub and Bsub */
3310     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3311     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3312 
3313     /* Create submatrix M */
3314     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3315 
3316     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3317     asub = (Mat_MPIAIJ*)M->data;
3318 
3319     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3320     n = asub->B->cmap->N;
3321     if (BsubN > n) {
3322       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3323       const PetscInt *idx;
3324       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3325       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3326 
3327       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3328       j = 0;
3329       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3330       for (i=0; i<n; i++) {
3331         if (j >= BsubN) break;
3332         while (subgarray[i] > garray[j]) j++;
3333 
3334         if (subgarray[i] == garray[j]) {
3335           idx_new[i] = idx[j++];
3336         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3337       }
3338       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3339 
3340       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3341       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3342 
3343     } else if (BsubN < n) {
3344       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3345     }
3346 
3347     ierr = PetscFree(garray);CHKERRQ(ierr);
3348     *submat = M;
3349 
3350     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3351     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3352     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3353 
3354     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3355     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3356 
3357     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3358     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3359   }
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3364 {
3365   PetscErrorCode ierr;
3366   IS             iscol_local=NULL,isrow_d;
3367   PetscInt       csize;
3368   PetscInt       n,i,j,start,end;
3369   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3370   MPI_Comm       comm;
3371 
3372   PetscFunctionBegin;
3373   /* If isrow has same processor distribution as mat,
3374      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3375   if (call == MAT_REUSE_MATRIX) {
3376     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3377     if (isrow_d) {
3378       sameRowDist  = PETSC_TRUE;
3379       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3380     } else {
3381       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3382       if (iscol_local) {
3383         sameRowDist  = PETSC_TRUE;
3384         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3385       }
3386     }
3387   } else {
3388     /* Check if isrow has same processor distribution as mat */
3389     sameDist[0] = PETSC_FALSE;
3390     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3391     if (!n) {
3392       sameDist[0] = PETSC_TRUE;
3393     } else {
3394       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3395       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3396       if (i >= start && j < end) {
3397         sameDist[0] = PETSC_TRUE;
3398       }
3399     }
3400 
3401     /* Check if iscol has same processor distribution as mat */
3402     sameDist[1] = PETSC_FALSE;
3403     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3404     if (!n) {
3405       sameDist[1] = PETSC_TRUE;
3406     } else {
3407       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3408       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3409       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3410     }
3411 
3412     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3413     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3414     sameRowDist = tsameDist[0];
3415   }
3416 
3417   if (sameRowDist) {
3418     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3419       /* isrow and iscol have same processor distribution as mat */
3420       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3421       PetscFunctionReturn(0);
3422     } else { /* sameRowDist */
3423       /* isrow has same processor distribution as mat */
3424       if (call == MAT_INITIAL_MATRIX) {
3425         PetscBool sorted;
3426         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3427         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3428         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3429         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3430 
3431         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3432         if (sorted) {
3433           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3434           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3435           PetscFunctionReturn(0);
3436         }
3437       } else { /* call == MAT_REUSE_MATRIX */
3438         IS    iscol_sub;
3439         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3440         if (iscol_sub) {
3441           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3442           PetscFunctionReturn(0);
3443         }
3444       }
3445     }
3446   }
3447 
3448   /* General case: iscol -> iscol_local which has global size of iscol */
3449   if (call == MAT_REUSE_MATRIX) {
3450     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3451     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3452   } else {
3453     if (!iscol_local) {
3454       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3455     }
3456   }
3457 
3458   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3459   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3460 
3461   if (call == MAT_INITIAL_MATRIX) {
3462     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3463     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3464   }
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 /*@C
3469      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3470          and "off-diagonal" part of the matrix in CSR format.
3471 
3472    Collective on MPI_Comm
3473 
3474    Input Parameters:
3475 +  comm - MPI communicator
3476 .  A - "diagonal" portion of matrix
3477 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3478 -  garray - global index of B columns
3479 
3480    Output Parameter:
3481 .   mat - the matrix, with input A as its local diagonal matrix
3482    Level: advanced
3483 
3484    Notes:
3485        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3486        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3487 
3488 .seealso: MatCreateMPIAIJWithSplitArrays()
3489 @*/
3490 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3491 {
3492   PetscErrorCode ierr;
3493   Mat_MPIAIJ     *maij;
3494   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3495   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3496   PetscScalar    *oa=b->a;
3497   Mat            Bnew;
3498   PetscInt       m,n,N;
3499 
3500   PetscFunctionBegin;
3501   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3502   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3503   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3504   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3505   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3506   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3507 
3508   /* Get global columns of mat */
3509   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3510 
3511   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3512   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3513   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3514   maij = (Mat_MPIAIJ*)(*mat)->data;
3515 
3516   (*mat)->preallocated = PETSC_TRUE;
3517 
3518   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3519   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3520 
3521   /* Set A as diagonal portion of *mat */
3522   maij->A = A;
3523 
3524   nz = oi[m];
3525   for (i=0; i<nz; i++) {
3526     col   = oj[i];
3527     oj[i] = garray[col];
3528   }
3529 
3530    /* Set Bnew as off-diagonal portion of *mat */
3531   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3532   bnew        = (Mat_SeqAIJ*)Bnew->data;
3533   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3534   maij->B     = Bnew;
3535 
3536   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3537 
3538   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3539   b->free_a       = PETSC_FALSE;
3540   b->free_ij      = PETSC_FALSE;
3541   ierr = MatDestroy(&B);CHKERRQ(ierr);
3542 
3543   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3544   bnew->free_a       = PETSC_TRUE;
3545   bnew->free_ij      = PETSC_TRUE;
3546 
3547   /* condense columns of maij->B */
3548   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3549   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3550   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3551   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3552   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3557 
3558 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3559 {
3560   PetscErrorCode ierr;
3561   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3562   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3563   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3564   Mat            M,Msub,B=a->B;
3565   MatScalar      *aa;
3566   Mat_SeqAIJ     *aij;
3567   PetscInt       *garray = a->garray,*colsub,Ncols;
3568   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3569   IS             iscol_sub,iscmap;
3570   const PetscInt *is_idx,*cmap;
3571   PetscBool      allcolumns=PETSC_FALSE;
3572   MPI_Comm       comm;
3573 
3574   PetscFunctionBegin;
3575   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3576 
3577   if (call == MAT_REUSE_MATRIX) {
3578     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3579     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3580     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3581 
3582     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3583     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3584 
3585     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3586     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3587 
3588     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3589 
3590   } else { /* call == MAT_INITIAL_MATRIX) */
3591     PetscBool flg;
3592 
3593     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3594     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3595 
3596     /* (1) iscol -> nonscalable iscol_local */
3597     /* Check for special case: each processor gets entire matrix columns */
3598     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3599     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3600     if (allcolumns) {
3601       iscol_sub = iscol_local;
3602       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3603       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3604 
3605     } else {
3606       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3607       PetscInt *idx,*cmap1,k;
3608       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3609       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3610       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3611       count = 0;
3612       k     = 0;
3613       for (i=0; i<Ncols; i++) {
3614         j = is_idx[i];
3615         if (j >= cstart && j < cend) {
3616           /* diagonal part of mat */
3617           idx[count]     = j;
3618           cmap1[count++] = i; /* column index in submat */
3619         } else if (Bn) {
3620           /* off-diagonal part of mat */
3621           if (j == garray[k]) {
3622             idx[count]     = j;
3623             cmap1[count++] = i;  /* column index in submat */
3624           } else if (j > garray[k]) {
3625             while (j > garray[k] && k < Bn-1) k++;
3626             if (j == garray[k]) {
3627               idx[count]     = j;
3628               cmap1[count++] = i; /* column index in submat */
3629             }
3630           }
3631         }
3632       }
3633       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3634 
3635       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3636       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3637       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3638 
3639       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3640     }
3641 
3642     /* (3) Create sequential Msub */
3643     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3644   }
3645 
3646   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3647   aij  = (Mat_SeqAIJ*)(Msub)->data;
3648   ii   = aij->i;
3649   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3650 
3651   /*
3652       m - number of local rows
3653       Ncols - number of columns (same on all processors)
3654       rstart - first row in new global matrix generated
3655   */
3656   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3657 
3658   if (call == MAT_INITIAL_MATRIX) {
3659     /* (4) Create parallel newmat */
3660     PetscMPIInt    rank,size;
3661     PetscInt       csize;
3662 
3663     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3664     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3665 
3666     /*
3667         Determine the number of non-zeros in the diagonal and off-diagonal
3668         portions of the matrix in order to do correct preallocation
3669     */
3670 
3671     /* first get start and end of "diagonal" columns */
3672     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3673     if (csize == PETSC_DECIDE) {
3674       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3675       if (mglobal == Ncols) { /* square matrix */
3676         nlocal = m;
3677       } else {
3678         nlocal = Ncols/size + ((Ncols % size) > rank);
3679       }
3680     } else {
3681       nlocal = csize;
3682     }
3683     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3684     rstart = rend - nlocal;
3685     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3686 
3687     /* next, compute all the lengths */
3688     jj    = aij->j;
3689     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3690     olens = dlens + m;
3691     for (i=0; i<m; i++) {
3692       jend = ii[i+1] - ii[i];
3693       olen = 0;
3694       dlen = 0;
3695       for (j=0; j<jend; j++) {
3696         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3697         else dlen++;
3698         jj++;
3699       }
3700       olens[i] = olen;
3701       dlens[i] = dlen;
3702     }
3703 
3704     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3705     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3706 
3707     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3708     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3709     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3710     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3711     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3712     ierr = PetscFree(dlens);CHKERRQ(ierr);
3713 
3714   } else { /* call == MAT_REUSE_MATRIX */
3715     M    = *newmat;
3716     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3717     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3718     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3719     /*
3720          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3721        rather than the slower MatSetValues().
3722     */
3723     M->was_assembled = PETSC_TRUE;
3724     M->assembled     = PETSC_FALSE;
3725   }
3726 
3727   /* (5) Set values of Msub to *newmat */
3728   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3729   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3730 
3731   jj   = aij->j;
3732   aa   = aij->a;
3733   for (i=0; i<m; i++) {
3734     row = rstart + i;
3735     nz  = ii[i+1] - ii[i];
3736     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3737     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3738     jj += nz; aa += nz;
3739   }
3740   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3741 
3742   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3744 
3745   ierr = PetscFree(colsub);CHKERRQ(ierr);
3746 
3747   /* save Msub, iscol_sub and iscmap used in processor for next request */
3748   if (call ==  MAT_INITIAL_MATRIX) {
3749     *newmat = M;
3750     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3751     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3752 
3753     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3754     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3755 
3756     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3757     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3758 
3759     if (iscol_local) {
3760       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3761       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3762     }
3763   }
3764   PetscFunctionReturn(0);
3765 }
3766 
3767 /*
3768     Not great since it makes two copies of the submatrix, first an SeqAIJ
3769   in local and then by concatenating the local matrices the end result.
3770   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3771 
3772   Note: This requires a sequential iscol with all indices.
3773 */
3774 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3775 {
3776   PetscErrorCode ierr;
3777   PetscMPIInt    rank,size;
3778   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3779   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3780   Mat            M,Mreuse;
3781   MatScalar      *aa,*vwork;
3782   MPI_Comm       comm;
3783   Mat_SeqAIJ     *aij;
3784   PetscBool      colflag,allcolumns=PETSC_FALSE;
3785 
3786   PetscFunctionBegin;
3787   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3788   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3789   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3790 
3791   /* Check for special case: each processor gets entire matrix columns */
3792   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3793   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3794   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3795 
3796   if (call ==  MAT_REUSE_MATRIX) {
3797     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3798     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3799     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3800   } else {
3801     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3802   }
3803 
3804   /*
3805       m - number of local rows
3806       n - number of columns (same on all processors)
3807       rstart - first row in new global matrix generated
3808   */
3809   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3810   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3811   if (call == MAT_INITIAL_MATRIX) {
3812     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3813     ii  = aij->i;
3814     jj  = aij->j;
3815 
3816     /*
3817         Determine the number of non-zeros in the diagonal and off-diagonal
3818         portions of the matrix in order to do correct preallocation
3819     */
3820 
3821     /* first get start and end of "diagonal" columns */
3822     if (csize == PETSC_DECIDE) {
3823       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3824       if (mglobal == n) { /* square matrix */
3825         nlocal = m;
3826       } else {
3827         nlocal = n/size + ((n % size) > rank);
3828       }
3829     } else {
3830       nlocal = csize;
3831     }
3832     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3833     rstart = rend - nlocal;
3834     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3835 
3836     /* next, compute all the lengths */
3837     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3838     olens = dlens + m;
3839     for (i=0; i<m; i++) {
3840       jend = ii[i+1] - ii[i];
3841       olen = 0;
3842       dlen = 0;
3843       for (j=0; j<jend; j++) {
3844         if (*jj < rstart || *jj >= rend) olen++;
3845         else dlen++;
3846         jj++;
3847       }
3848       olens[i] = olen;
3849       dlens[i] = dlen;
3850     }
3851     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3852     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3853     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3854     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3855     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3856     ierr = PetscFree(dlens);CHKERRQ(ierr);
3857   } else {
3858     PetscInt ml,nl;
3859 
3860     M    = *newmat;
3861     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3862     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3863     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3864     /*
3865          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3866        rather than the slower MatSetValues().
3867     */
3868     M->was_assembled = PETSC_TRUE;
3869     M->assembled     = PETSC_FALSE;
3870   }
3871   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3872   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3873   ii   = aij->i;
3874   jj   = aij->j;
3875   aa   = aij->a;
3876   for (i=0; i<m; i++) {
3877     row   = rstart + i;
3878     nz    = ii[i+1] - ii[i];
3879     cwork = jj;     jj += nz;
3880     vwork = aa;     aa += nz;
3881     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3882   }
3883 
3884   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886   *newmat = M;
3887 
3888   /* save submatrix used in processor for next request */
3889   if (call ==  MAT_INITIAL_MATRIX) {
3890     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3891     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3892   }
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3897 {
3898   PetscInt       m,cstart, cend,j,nnz,i,d;
3899   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3900   const PetscInt *JJ;
3901   PetscScalar    *values;
3902   PetscErrorCode ierr;
3903   PetscBool      nooffprocentries;
3904 
3905   PetscFunctionBegin;
3906   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3907 
3908   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3909   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3910   m      = B->rmap->n;
3911   cstart = B->cmap->rstart;
3912   cend   = B->cmap->rend;
3913   rstart = B->rmap->rstart;
3914 
3915   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3916 
3917 #if defined(PETSC_USE_DEBUG)
3918   for (i=0; i<m && Ii; i++) {
3919     nnz = Ii[i+1]- Ii[i];
3920     JJ  = J + Ii[i];
3921     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3922     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3923     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3924   }
3925 #endif
3926 
3927   for (i=0; i<m && Ii; i++) {
3928     nnz     = Ii[i+1]- Ii[i];
3929     JJ      = J + Ii[i];
3930     nnz_max = PetscMax(nnz_max,nnz);
3931     d       = 0;
3932     for (j=0; j<nnz; j++) {
3933       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3934     }
3935     d_nnz[i] = d;
3936     o_nnz[i] = nnz - d;
3937   }
3938   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3939   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3940 
3941   if (v) values = (PetscScalar*)v;
3942   else {
3943     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3944   }
3945 
3946   for (i=0; i<m && Ii; i++) {
3947     ii   = i + rstart;
3948     nnz  = Ii[i+1]- Ii[i];
3949     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3950   }
3951   nooffprocentries    = B->nooffprocentries;
3952   B->nooffprocentries = PETSC_TRUE;
3953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3955   B->nooffprocentries = nooffprocentries;
3956 
3957   if (!v) {
3958     ierr = PetscFree(values);CHKERRQ(ierr);
3959   }
3960   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3961   PetscFunctionReturn(0);
3962 }
3963 
3964 /*@
3965    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3966    (the default parallel PETSc format).
3967 
3968    Collective on MPI_Comm
3969 
3970    Input Parameters:
3971 +  B - the matrix
3972 .  i - the indices into j for the start of each local row (starts with zero)
3973 .  j - the column indices for each local row (starts with zero)
3974 -  v - optional values in the matrix
3975 
3976    Level: developer
3977 
3978    Notes:
3979        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3980      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3981      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3982 
3983        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3984 
3985        The format which is used for the sparse matrix input, is equivalent to a
3986     row-major ordering.. i.e for the following matrix, the input data expected is
3987     as shown
3988 
3989 $        1 0 0
3990 $        2 0 3     P0
3991 $       -------
3992 $        4 5 6     P1
3993 $
3994 $     Process0 [P0]: rows_owned=[0,1]
3995 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3996 $        j =  {0,0,2}  [size = 3]
3997 $        v =  {1,2,3}  [size = 3]
3998 $
3999 $     Process1 [P1]: rows_owned=[2]
4000 $        i =  {0,3}    [size = nrow+1  = 1+1]
4001 $        j =  {0,1,2}  [size = 3]
4002 $        v =  {4,5,6}  [size = 3]
4003 
4004 .keywords: matrix, aij, compressed row, sparse, parallel
4005 
4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4007           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4008 @*/
4009 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4010 {
4011   PetscErrorCode ierr;
4012 
4013   PetscFunctionBegin;
4014   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 /*@C
4019    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4020    (the default parallel PETSc format).  For good matrix assembly performance
4021    the user should preallocate the matrix storage by setting the parameters
4022    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4023    performance can be increased by more than a factor of 50.
4024 
4025    Collective on MPI_Comm
4026 
4027    Input Parameters:
4028 +  B - the matrix
4029 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4030            (same value is used for all local rows)
4031 .  d_nnz - array containing the number of nonzeros in the various rows of the
4032            DIAGONAL portion of the local submatrix (possibly different for each row)
4033            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4034            The size of this array is equal to the number of local rows, i.e 'm'.
4035            For matrices that will be factored, you must leave room for (and set)
4036            the diagonal entry even if it is zero.
4037 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4038            submatrix (same value is used for all local rows).
4039 -  o_nnz - array containing the number of nonzeros in the various rows of the
4040            OFF-DIAGONAL portion of the local submatrix (possibly different for
4041            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4042            structure. The size of this array is equal to the number
4043            of local rows, i.e 'm'.
4044 
4045    If the *_nnz parameter is given then the *_nz parameter is ignored
4046 
4047    The AIJ format (also called the Yale sparse matrix format or
4048    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4049    storage.  The stored row and column indices begin with zero.
4050    See Users-Manual: ch_mat for details.
4051 
4052    The parallel matrix is partitioned such that the first m0 rows belong to
4053    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4054    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4055 
4056    The DIAGONAL portion of the local submatrix of a processor can be defined
4057    as the submatrix which is obtained by extraction the part corresponding to
4058    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4059    first row that belongs to the processor, r2 is the last row belonging to
4060    the this processor, and c1-c2 is range of indices of the local part of a
4061    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4062    common case of a square matrix, the row and column ranges are the same and
4063    the DIAGONAL part is also square. The remaining portion of the local
4064    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4065 
4066    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4067 
4068    You can call MatGetInfo() to get information on how effective the preallocation was;
4069    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4070    You can also run with the option -info and look for messages with the string
4071    malloc in them to see if additional memory allocation was needed.
4072 
4073    Example usage:
4074 
4075    Consider the following 8x8 matrix with 34 non-zero values, that is
4076    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4077    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4078    as follows:
4079 
4080 .vb
4081             1  2  0  |  0  3  0  |  0  4
4082     Proc0   0  5  6  |  7  0  0  |  8  0
4083             9  0 10  | 11  0  0  | 12  0
4084     -------------------------------------
4085            13  0 14  | 15 16 17  |  0  0
4086     Proc1   0 18  0  | 19 20 21  |  0  0
4087             0  0  0  | 22 23  0  | 24  0
4088     -------------------------------------
4089     Proc2  25 26 27  |  0  0 28  | 29  0
4090            30  0  0  | 31 32 33  |  0 34
4091 .ve
4092 
4093    This can be represented as a collection of submatrices as:
4094 
4095 .vb
4096       A B C
4097       D E F
4098       G H I
4099 .ve
4100 
4101    Where the submatrices A,B,C are owned by proc0, D,E,F are
4102    owned by proc1, G,H,I are owned by proc2.
4103 
4104    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4106    The 'M','N' parameters are 8,8, and have the same values on all procs.
4107 
4108    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4109    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4110    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4111    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4112    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4113    matrix, ans [DF] as another SeqAIJ matrix.
4114 
4115    When d_nz, o_nz parameters are specified, d_nz storage elements are
4116    allocated for every row of the local diagonal submatrix, and o_nz
4117    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4118    One way to choose d_nz and o_nz is to use the max nonzerors per local
4119    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4120    In this case, the values of d_nz,o_nz are:
4121 .vb
4122      proc0 : dnz = 2, o_nz = 2
4123      proc1 : dnz = 3, o_nz = 2
4124      proc2 : dnz = 1, o_nz = 4
4125 .ve
4126    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4127    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4128    for proc3. i.e we are using 12+15+10=37 storage locations to store
4129    34 values.
4130 
4131    When d_nnz, o_nnz parameters are specified, the storage is specified
4132    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4133    In the above case the values for d_nnz,o_nnz are:
4134 .vb
4135      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4136      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4137      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4138 .ve
4139    Here the space allocated is sum of all the above values i.e 34, and
4140    hence pre-allocation is perfect.
4141 
4142    Level: intermediate
4143 
4144 .keywords: matrix, aij, compressed row, sparse, parallel
4145 
4146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4147           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4148 @*/
4149 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4150 {
4151   PetscErrorCode ierr;
4152 
4153   PetscFunctionBegin;
4154   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4155   PetscValidType(B,1);
4156   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 /*@
4161      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4162          CSR format the local rows.
4163 
4164    Collective on MPI_Comm
4165 
4166    Input Parameters:
4167 +  comm - MPI communicator
4168 .  m - number of local rows (Cannot be PETSC_DECIDE)
4169 .  n - This value should be the same as the local size used in creating the
4170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4171        calculated if N is given) For square matrices n is almost always m.
4172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4174 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4175 .   j - column indices
4176 -   a - matrix values
4177 
4178    Output Parameter:
4179 .   mat - the matrix
4180 
4181    Level: intermediate
4182 
4183    Notes:
4184        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4185      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4186      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4187 
4188        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4189 
4190        The format which is used for the sparse matrix input, is equivalent to a
4191     row-major ordering.. i.e for the following matrix, the input data expected is
4192     as shown
4193 
4194 $        1 0 0
4195 $        2 0 3     P0
4196 $       -------
4197 $        4 5 6     P1
4198 $
4199 $     Process0 [P0]: rows_owned=[0,1]
4200 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4201 $        j =  {0,0,2}  [size = 3]
4202 $        v =  {1,2,3}  [size = 3]
4203 $
4204 $     Process1 [P1]: rows_owned=[2]
4205 $        i =  {0,3}    [size = nrow+1  = 1+1]
4206 $        j =  {0,1,2}  [size = 3]
4207 $        v =  {4,5,6}  [size = 3]
4208 
4209 .keywords: matrix, aij, compressed row, sparse, parallel
4210 
4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4212           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4213 @*/
4214 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4215 {
4216   PetscErrorCode ierr;
4217 
4218   PetscFunctionBegin;
4219   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4220   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4221   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4222   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4223   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4224   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4225   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4226   PetscFunctionReturn(0);
4227 }
4228 
4229 /*@C
4230    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4231    (the default parallel PETSc format).  For good matrix assembly performance
4232    the user should preallocate the matrix storage by setting the parameters
4233    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4234    performance can be increased by more than a factor of 50.
4235 
4236    Collective on MPI_Comm
4237 
4238    Input Parameters:
4239 +  comm - MPI communicator
4240 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4241            This value should be the same as the local size used in creating the
4242            y vector for the matrix-vector product y = Ax.
4243 .  n - This value should be the same as the local size used in creating the
4244        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4245        calculated if N is given) For square matrices n is almost always m.
4246 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4247 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4248 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4249            (same value is used for all local rows)
4250 .  d_nnz - array containing the number of nonzeros in the various rows of the
4251            DIAGONAL portion of the local submatrix (possibly different for each row)
4252            or NULL, if d_nz is used to specify the nonzero structure.
4253            The size of this array is equal to the number of local rows, i.e 'm'.
4254 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4255            submatrix (same value is used for all local rows).
4256 -  o_nnz - array containing the number of nonzeros in the various rows of the
4257            OFF-DIAGONAL portion of the local submatrix (possibly different for
4258            each row) or NULL, if o_nz is used to specify the nonzero
4259            structure. The size of this array is equal to the number
4260            of local rows, i.e 'm'.
4261 
4262    Output Parameter:
4263 .  A - the matrix
4264 
4265    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4266    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4267    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4268 
4269    Notes:
4270    If the *_nnz parameter is given then the *_nz parameter is ignored
4271 
4272    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4273    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4274    storage requirements for this matrix.
4275 
4276    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4277    processor than it must be used on all processors that share the object for
4278    that argument.
4279 
4280    The user MUST specify either the local or global matrix dimensions
4281    (possibly both).
4282 
4283    The parallel matrix is partitioned across processors such that the
4284    first m0 rows belong to process 0, the next m1 rows belong to
4285    process 1, the next m2 rows belong to process 2 etc.. where
4286    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4287    values corresponding to [m x N] submatrix.
4288 
4289    The columns are logically partitioned with the n0 columns belonging
4290    to 0th partition, the next n1 columns belonging to the next
4291    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4292 
4293    The DIAGONAL portion of the local submatrix on any given processor
4294    is the submatrix corresponding to the rows and columns m,n
4295    corresponding to the given processor. i.e diagonal matrix on
4296    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4297    etc. The remaining portion of the local submatrix [m x (N-n)]
4298    constitute the OFF-DIAGONAL portion. The example below better
4299    illustrates this concept.
4300 
4301    For a square global matrix we define each processor's diagonal portion
4302    to be its local rows and the corresponding columns (a square submatrix);
4303    each processor's off-diagonal portion encompasses the remainder of the
4304    local matrix (a rectangular submatrix).
4305 
4306    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4307 
4308    When calling this routine with a single process communicator, a matrix of
4309    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4310    type of communicator, use the construction mechanism
4311 .vb
4312      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4313 .ve
4314 
4315 $     MatCreate(...,&A);
4316 $     MatSetType(A,MATMPIAIJ);
4317 $     MatSetSizes(A, m,n,M,N);
4318 $     MatMPIAIJSetPreallocation(A,...);
4319 
4320    By default, this format uses inodes (identical nodes) when possible.
4321    We search for consecutive rows with the same nonzero structure, thereby
4322    reusing matrix information to achieve increased efficiency.
4323 
4324    Options Database Keys:
4325 +  -mat_no_inode  - Do not use inodes
4326 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4327 
4328 
4329 
4330    Example usage:
4331 
4332    Consider the following 8x8 matrix with 34 non-zero values, that is
4333    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4334    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4335    as follows
4336 
4337 .vb
4338             1  2  0  |  0  3  0  |  0  4
4339     Proc0   0  5  6  |  7  0  0  |  8  0
4340             9  0 10  | 11  0  0  | 12  0
4341     -------------------------------------
4342            13  0 14  | 15 16 17  |  0  0
4343     Proc1   0 18  0  | 19 20 21  |  0  0
4344             0  0  0  | 22 23  0  | 24  0
4345     -------------------------------------
4346     Proc2  25 26 27  |  0  0 28  | 29  0
4347            30  0  0  | 31 32 33  |  0 34
4348 .ve
4349 
4350    This can be represented as a collection of submatrices as
4351 
4352 .vb
4353       A B C
4354       D E F
4355       G H I
4356 .ve
4357 
4358    Where the submatrices A,B,C are owned by proc0, D,E,F are
4359    owned by proc1, G,H,I are owned by proc2.
4360 
4361    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4362    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4363    The 'M','N' parameters are 8,8, and have the same values on all procs.
4364 
4365    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4366    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4367    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4368    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4369    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4370    matrix, ans [DF] as another SeqAIJ matrix.
4371 
4372    When d_nz, o_nz parameters are specified, d_nz storage elements are
4373    allocated for every row of the local diagonal submatrix, and o_nz
4374    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4375    One way to choose d_nz and o_nz is to use the max nonzerors per local
4376    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4377    In this case, the values of d_nz,o_nz are
4378 .vb
4379      proc0 : dnz = 2, o_nz = 2
4380      proc1 : dnz = 3, o_nz = 2
4381      proc2 : dnz = 1, o_nz = 4
4382 .ve
4383    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4384    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4385    for proc3. i.e we are using 12+15+10=37 storage locations to store
4386    34 values.
4387 
4388    When d_nnz, o_nnz parameters are specified, the storage is specified
4389    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4390    In the above case the values for d_nnz,o_nnz are
4391 .vb
4392      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4393      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4394      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4395 .ve
4396    Here the space allocated is sum of all the above values i.e 34, and
4397    hence pre-allocation is perfect.
4398 
4399    Level: intermediate
4400 
4401 .keywords: matrix, aij, compressed row, sparse, parallel
4402 
4403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4404           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4405 @*/
4406 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4407 {
4408   PetscErrorCode ierr;
4409   PetscMPIInt    size;
4410 
4411   PetscFunctionBegin;
4412   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4413   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4415   if (size > 1) {
4416     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4417     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4418   } else {
4419     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4420     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4421   }
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4426 {
4427   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4428   PetscBool      flg;
4429   PetscErrorCode ierr;
4430 
4431   PetscFunctionBegin;
4432   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4433   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4434   if (Ad)     *Ad     = a->A;
4435   if (Ao)     *Ao     = a->B;
4436   if (colmap) *colmap = a->garray;
4437   PetscFunctionReturn(0);
4438 }
4439 
4440 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4441 {
4442   PetscErrorCode ierr;
4443   PetscInt       m,N,i,rstart,nnz,Ii;
4444   PetscInt       *indx;
4445   PetscScalar    *values;
4446 
4447   PetscFunctionBegin;
4448   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4449   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4450     PetscInt       *dnz,*onz,sum,bs,cbs;
4451 
4452     if (n == PETSC_DECIDE) {
4453       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4454     }
4455     /* Check sum(n) = N */
4456     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4457     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4458 
4459     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4460     rstart -= m;
4461 
4462     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4463     for (i=0; i<m; i++) {
4464       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4465       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4466       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4467     }
4468 
4469     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4470     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4471     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4472     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4473     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4474     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4475     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4476     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4477   }
4478 
4479   /* numeric phase */
4480   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4481   for (i=0; i<m; i++) {
4482     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4483     Ii   = i + rstart;
4484     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4485     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4486   }
4487   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4488   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4489   PetscFunctionReturn(0);
4490 }
4491 
4492 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4493 {
4494   PetscErrorCode    ierr;
4495   PetscMPIInt       rank;
4496   PetscInt          m,N,i,rstart,nnz;
4497   size_t            len;
4498   const PetscInt    *indx;
4499   PetscViewer       out;
4500   char              *name;
4501   Mat               B;
4502   const PetscScalar *values;
4503 
4504   PetscFunctionBegin;
4505   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4506   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4507   /* Should this be the type of the diagonal block of A? */
4508   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4509   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4510   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4511   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4512   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4513   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4514   for (i=0; i<m; i++) {
4515     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4517     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4518   }
4519   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4520   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4521 
4522   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4523   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4524   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4525   sprintf(name,"%s.%d",outfile,rank);
4526   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4527   ierr = PetscFree(name);CHKERRQ(ierr);
4528   ierr = MatView(B,out);CHKERRQ(ierr);
4529   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4530   ierr = MatDestroy(&B);CHKERRQ(ierr);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4535 {
4536   PetscErrorCode      ierr;
4537   Mat_Merge_SeqsToMPI *merge;
4538   PetscContainer      container;
4539 
4540   PetscFunctionBegin;
4541   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4542   if (container) {
4543     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4556     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4557     ierr = PetscFree(merge);CHKERRQ(ierr);
4558     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4559   }
4560   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 #include <../src/mat/utils/freespace.h>
4565 #include <petscbt.h>
4566 
4567 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4568 {
4569   PetscErrorCode      ierr;
4570   MPI_Comm            comm;
4571   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4572   PetscMPIInt         size,rank,taga,*len_s;
4573   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4574   PetscInt            proc,m;
4575   PetscInt            **buf_ri,**buf_rj;
4576   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4577   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4578   MPI_Request         *s_waits,*r_waits;
4579   MPI_Status          *status;
4580   MatScalar           *aa=a->a;
4581   MatScalar           **abuf_r,*ba_i;
4582   Mat_Merge_SeqsToMPI *merge;
4583   PetscContainer      container;
4584 
4585   PetscFunctionBegin;
4586   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4587   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4588 
4589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4590   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4591 
4592   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4593   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4594 
4595   bi     = merge->bi;
4596   bj     = merge->bj;
4597   buf_ri = merge->buf_ri;
4598   buf_rj = merge->buf_rj;
4599 
4600   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4601   owners = merge->rowmap->range;
4602   len_s  = merge->len_s;
4603 
4604   /* send and recv matrix values */
4605   /*-----------------------------*/
4606   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4607   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4608 
4609   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4610   for (proc=0,k=0; proc<size; proc++) {
4611     if (!len_s[proc]) continue;
4612     i    = owners[proc];
4613     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4614     k++;
4615   }
4616 
4617   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4618   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4619   ierr = PetscFree(status);CHKERRQ(ierr);
4620 
4621   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4622   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4623 
4624   /* insert mat values of mpimat */
4625   /*----------------------------*/
4626   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4627   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4628 
4629   for (k=0; k<merge->nrecv; k++) {
4630     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4631     nrows       = *(buf_ri_k[k]);
4632     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4633     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4634   }
4635 
4636   /* set values of ba */
4637   m = merge->rowmap->n;
4638   for (i=0; i<m; i++) {
4639     arow = owners[rank] + i;
4640     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4641     bnzi = bi[i+1] - bi[i];
4642     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4643 
4644     /* add local non-zero vals of this proc's seqmat into ba */
4645     anzi   = ai[arow+1] - ai[arow];
4646     aj     = a->j + ai[arow];
4647     aa     = a->a + ai[arow];
4648     nextaj = 0;
4649     for (j=0; nextaj<anzi; j++) {
4650       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4651         ba_i[j] += aa[nextaj++];
4652       }
4653     }
4654 
4655     /* add received vals into ba */
4656     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4657       /* i-th row */
4658       if (i == *nextrow[k]) {
4659         anzi   = *(nextai[k]+1) - *nextai[k];
4660         aj     = buf_rj[k] + *(nextai[k]);
4661         aa     = abuf_r[k] + *(nextai[k]);
4662         nextaj = 0;
4663         for (j=0; nextaj<anzi; j++) {
4664           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4665             ba_i[j] += aa[nextaj++];
4666           }
4667         }
4668         nextrow[k]++; nextai[k]++;
4669       }
4670     }
4671     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4672   }
4673   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4674   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4675 
4676   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4677   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4678   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4679   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4680   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4681   PetscFunctionReturn(0);
4682 }
4683 
4684 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4685 {
4686   PetscErrorCode      ierr;
4687   Mat                 B_mpi;
4688   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4689   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4690   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4691   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4692   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4693   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4694   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4695   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4696   MPI_Status          *status;
4697   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4698   PetscBT             lnkbt;
4699   Mat_Merge_SeqsToMPI *merge;
4700   PetscContainer      container;
4701 
4702   PetscFunctionBegin;
4703   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4704 
4705   /* make sure it is a PETSc comm */
4706   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4707   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4709 
4710   ierr = PetscNew(&merge);CHKERRQ(ierr);
4711   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4712 
4713   /* determine row ownership */
4714   /*---------------------------------------------------------*/
4715   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4716   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4717   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4718   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4719   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4720   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4721   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4722 
4723   m      = merge->rowmap->n;
4724   owners = merge->rowmap->range;
4725 
4726   /* determine the number of messages to send, their lengths */
4727   /*---------------------------------------------------------*/
4728   len_s = merge->len_s;
4729 
4730   len          = 0; /* length of buf_si[] */
4731   merge->nsend = 0;
4732   for (proc=0; proc<size; proc++) {
4733     len_si[proc] = 0;
4734     if (proc == rank) {
4735       len_s[proc] = 0;
4736     } else {
4737       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4738       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4739     }
4740     if (len_s[proc]) {
4741       merge->nsend++;
4742       nrows = 0;
4743       for (i=owners[proc]; i<owners[proc+1]; i++) {
4744         if (ai[i+1] > ai[i]) nrows++;
4745       }
4746       len_si[proc] = 2*(nrows+1);
4747       len         += len_si[proc];
4748     }
4749   }
4750 
4751   /* determine the number and length of messages to receive for ij-structure */
4752   /*-------------------------------------------------------------------------*/
4753   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4754   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4755 
4756   /* post the Irecv of j-structure */
4757   /*-------------------------------*/
4758   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4759   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4760 
4761   /* post the Isend of j-structure */
4762   /*--------------------------------*/
4763   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4764 
4765   for (proc=0, k=0; proc<size; proc++) {
4766     if (!len_s[proc]) continue;
4767     i    = owners[proc];
4768     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4769     k++;
4770   }
4771 
4772   /* receives and sends of j-structure are complete */
4773   /*------------------------------------------------*/
4774   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4775   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4776 
4777   /* send and recv i-structure */
4778   /*---------------------------*/
4779   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4780   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4781 
4782   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4783   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4784   for (proc=0,k=0; proc<size; proc++) {
4785     if (!len_s[proc]) continue;
4786     /* form outgoing message for i-structure:
4787          buf_si[0]:                 nrows to be sent
4788                [1:nrows]:           row index (global)
4789                [nrows+1:2*nrows+1]: i-structure index
4790     */
4791     /*-------------------------------------------*/
4792     nrows       = len_si[proc]/2 - 1;
4793     buf_si_i    = buf_si + nrows+1;
4794     buf_si[0]   = nrows;
4795     buf_si_i[0] = 0;
4796     nrows       = 0;
4797     for (i=owners[proc]; i<owners[proc+1]; i++) {
4798       anzi = ai[i+1] - ai[i];
4799       if (anzi) {
4800         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4801         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4802         nrows++;
4803       }
4804     }
4805     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4806     k++;
4807     buf_si += len_si[proc];
4808   }
4809 
4810   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4811   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4812 
4813   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4814   for (i=0; i<merge->nrecv; i++) {
4815     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4816   }
4817 
4818   ierr = PetscFree(len_si);CHKERRQ(ierr);
4819   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4820   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4821   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4822   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4823   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4824   ierr = PetscFree(status);CHKERRQ(ierr);
4825 
4826   /* compute a local seq matrix in each processor */
4827   /*----------------------------------------------*/
4828   /* allocate bi array and free space for accumulating nonzero column info */
4829   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4830   bi[0] = 0;
4831 
4832   /* create and initialize a linked list */
4833   nlnk = N+1;
4834   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4835 
4836   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4837   len  = ai[owners[rank+1]] - ai[owners[rank]];
4838   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4839 
4840   current_space = free_space;
4841 
4842   /* determine symbolic info for each local row */
4843   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4844 
4845   for (k=0; k<merge->nrecv; k++) {
4846     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4847     nrows       = *buf_ri_k[k];
4848     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4849     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4850   }
4851 
4852   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4853   len  = 0;
4854   for (i=0; i<m; i++) {
4855     bnzi = 0;
4856     /* add local non-zero cols of this proc's seqmat into lnk */
4857     arow  = owners[rank] + i;
4858     anzi  = ai[arow+1] - ai[arow];
4859     aj    = a->j + ai[arow];
4860     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4861     bnzi += nlnk;
4862     /* add received col data into lnk */
4863     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4864       if (i == *nextrow[k]) { /* i-th row */
4865         anzi  = *(nextai[k]+1) - *nextai[k];
4866         aj    = buf_rj[k] + *nextai[k];
4867         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4868         bnzi += nlnk;
4869         nextrow[k]++; nextai[k]++;
4870       }
4871     }
4872     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4873 
4874     /* if free space is not available, make more free space */
4875     if (current_space->local_remaining<bnzi) {
4876       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4877       nspacedouble++;
4878     }
4879     /* copy data into free space, then initialize lnk */
4880     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4881     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4882 
4883     current_space->array           += bnzi;
4884     current_space->local_used      += bnzi;
4885     current_space->local_remaining -= bnzi;
4886 
4887     bi[i+1] = bi[i] + bnzi;
4888   }
4889 
4890   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4891 
4892   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4893   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4894   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4895 
4896   /* create symbolic parallel matrix B_mpi */
4897   /*---------------------------------------*/
4898   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4899   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4900   if (n==PETSC_DECIDE) {
4901     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4902   } else {
4903     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4904   }
4905   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4906   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4907   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4908   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4909   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4910 
4911   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4912   B_mpi->assembled    = PETSC_FALSE;
4913   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4914   merge->bi           = bi;
4915   merge->bj           = bj;
4916   merge->buf_ri       = buf_ri;
4917   merge->buf_rj       = buf_rj;
4918   merge->coi          = NULL;
4919   merge->coj          = NULL;
4920   merge->owners_co    = NULL;
4921 
4922   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4923 
4924   /* attach the supporting struct to B_mpi for reuse */
4925   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4926   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4927   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4928   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4929   *mpimat = B_mpi;
4930 
4931   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 /*@C
4936       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4937                  matrices from each processor
4938 
4939     Collective on MPI_Comm
4940 
4941    Input Parameters:
4942 +    comm - the communicators the parallel matrix will live on
4943 .    seqmat - the input sequential matrices
4944 .    m - number of local rows (or PETSC_DECIDE)
4945 .    n - number of local columns (or PETSC_DECIDE)
4946 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4947 
4948    Output Parameter:
4949 .    mpimat - the parallel matrix generated
4950 
4951     Level: advanced
4952 
4953    Notes:
4954      The dimensions of the sequential matrix in each processor MUST be the same.
4955      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4956      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4957 @*/
4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4959 {
4960   PetscErrorCode ierr;
4961   PetscMPIInt    size;
4962 
4963   PetscFunctionBegin;
4964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4965   if (size == 1) {
4966     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4967     if (scall == MAT_INITIAL_MATRIX) {
4968       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4969     } else {
4970       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4971     }
4972     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4973     PetscFunctionReturn(0);
4974   }
4975   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4976   if (scall == MAT_INITIAL_MATRIX) {
4977     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4978   }
4979   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4980   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 /*@
4985      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4986           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4987           with MatGetSize()
4988 
4989     Not Collective
4990 
4991    Input Parameters:
4992 +    A - the matrix
4993 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4994 
4995    Output Parameter:
4996 .    A_loc - the local sequential matrix generated
4997 
4998     Level: developer
4999 
5000 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5001 
5002 @*/
5003 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5004 {
5005   PetscErrorCode ierr;
5006   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5007   Mat_SeqAIJ     *mat,*a,*b;
5008   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5009   MatScalar      *aa,*ba,*cam;
5010   PetscScalar    *ca;
5011   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5012   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5013   PetscBool      match;
5014   MPI_Comm       comm;
5015   PetscMPIInt    size;
5016 
5017   PetscFunctionBegin;
5018   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5019   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5020   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5022   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5023 
5024   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5025   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5026   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5027   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5028   aa = a->a; ba = b->a;
5029   if (scall == MAT_INITIAL_MATRIX) {
5030     if (size == 1) {
5031       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5032       PetscFunctionReturn(0);
5033     }
5034 
5035     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5036     ci[0] = 0;
5037     for (i=0; i<am; i++) {
5038       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5039     }
5040     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5041     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5042     k    = 0;
5043     for (i=0; i<am; i++) {
5044       ncols_o = bi[i+1] - bi[i];
5045       ncols_d = ai[i+1] - ai[i];
5046       /* off-diagonal portion of A */
5047       for (jo=0; jo<ncols_o; jo++) {
5048         col = cmap[*bj];
5049         if (col >= cstart) break;
5050         cj[k]   = col; bj++;
5051         ca[k++] = *ba++;
5052       }
5053       /* diagonal portion of A */
5054       for (j=0; j<ncols_d; j++) {
5055         cj[k]   = cstart + *aj++;
5056         ca[k++] = *aa++;
5057       }
5058       /* off-diagonal portion of A */
5059       for (j=jo; j<ncols_o; j++) {
5060         cj[k]   = cmap[*bj++];
5061         ca[k++] = *ba++;
5062       }
5063     }
5064     /* put together the new matrix */
5065     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5066     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5067     /* Since these are PETSc arrays, change flags to free them as necessary. */
5068     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5069     mat->free_a  = PETSC_TRUE;
5070     mat->free_ij = PETSC_TRUE;
5071     mat->nonew   = 0;
5072   } else if (scall == MAT_REUSE_MATRIX) {
5073     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5074     ci = mat->i; cj = mat->j; cam = mat->a;
5075     for (i=0; i<am; i++) {
5076       /* off-diagonal portion of A */
5077       ncols_o = bi[i+1] - bi[i];
5078       for (jo=0; jo<ncols_o; jo++) {
5079         col = cmap[*bj];
5080         if (col >= cstart) break;
5081         *cam++ = *ba++; bj++;
5082       }
5083       /* diagonal portion of A */
5084       ncols_d = ai[i+1] - ai[i];
5085       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5086       /* off-diagonal portion of A */
5087       for (j=jo; j<ncols_o; j++) {
5088         *cam++ = *ba++; bj++;
5089       }
5090     }
5091   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5092   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 /*@C
5097      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5098 
5099     Not Collective
5100 
5101    Input Parameters:
5102 +    A - the matrix
5103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 -    row, col - index sets of rows and columns to extract (or NULL)
5105 
5106    Output Parameter:
5107 .    A_loc - the local sequential matrix generated
5108 
5109     Level: developer
5110 
5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5112 
5113 @*/
5114 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5115 {
5116   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5117   PetscErrorCode ierr;
5118   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5119   IS             isrowa,iscola;
5120   Mat            *aloc;
5121   PetscBool      match;
5122 
5123   PetscFunctionBegin;
5124   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5125   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5126   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5127   if (!row) {
5128     start = A->rmap->rstart; end = A->rmap->rend;
5129     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5130   } else {
5131     isrowa = *row;
5132   }
5133   if (!col) {
5134     start = A->cmap->rstart;
5135     cmap  = a->garray;
5136     nzA   = a->A->cmap->n;
5137     nzB   = a->B->cmap->n;
5138     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5139     ncols = 0;
5140     for (i=0; i<nzB; i++) {
5141       if (cmap[i] < start) idx[ncols++] = cmap[i];
5142       else break;
5143     }
5144     imark = i;
5145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5148   } else {
5149     iscola = *col;
5150   }
5151   if (scall != MAT_INITIAL_MATRIX) {
5152     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5153     aloc[0] = *A_loc;
5154   }
5155   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5156   if (!col) { /* attach global id of condensed columns */
5157     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5158   }
5159   *A_loc = aloc[0];
5160   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5161   if (!row) {
5162     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5163   }
5164   if (!col) {
5165     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5166   }
5167   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5168   PetscFunctionReturn(0);
5169 }
5170 
5171 /*@C
5172     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5173 
5174     Collective on Mat
5175 
5176    Input Parameters:
5177 +    A,B - the matrices in mpiaij format
5178 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5179 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5180 
5181    Output Parameter:
5182 +    rowb, colb - index sets of rows and columns of B to extract
5183 -    B_seq - the sequential matrix generated
5184 
5185     Level: developer
5186 
5187 @*/
5188 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5189 {
5190   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5191   PetscErrorCode ierr;
5192   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5193   IS             isrowb,iscolb;
5194   Mat            *bseq=NULL;
5195 
5196   PetscFunctionBegin;
5197   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5198     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5199   }
5200   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5201 
5202   if (scall == MAT_INITIAL_MATRIX) {
5203     start = A->cmap->rstart;
5204     cmap  = a->garray;
5205     nzA   = a->A->cmap->n;
5206     nzB   = a->B->cmap->n;
5207     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5208     ncols = 0;
5209     for (i=0; i<nzB; i++) {  /* row < local row index */
5210       if (cmap[i] < start) idx[ncols++] = cmap[i];
5211       else break;
5212     }
5213     imark = i;
5214     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5215     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5216     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5217     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5218   } else {
5219     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5220     isrowb  = *rowb; iscolb = *colb;
5221     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5222     bseq[0] = *B_seq;
5223   }
5224   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5225   *B_seq = bseq[0];
5226   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5227   if (!rowb) {
5228     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5229   } else {
5230     *rowb = isrowb;
5231   }
5232   if (!colb) {
5233     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5234   } else {
5235     *colb = iscolb;
5236   }
5237   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5238   PetscFunctionReturn(0);
5239 }
5240 
5241 /*
5242     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5243     of the OFF-DIAGONAL portion of local A
5244 
5245     Collective on Mat
5246 
5247    Input Parameters:
5248 +    A,B - the matrices in mpiaij format
5249 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5250 
5251    Output Parameter:
5252 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5253 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5254 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5255 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5256 
5257     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5258      for this matrix. This is not desirable..
5259 
5260     Level: developer
5261 
5262 */
5263 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5264 {
5265   PetscErrorCode         ierr;
5266   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5267   Mat_SeqAIJ             *b_oth;
5268   VecScatter             ctx;
5269   MPI_Comm               comm;
5270   const PetscMPIInt      *rprocs,*sprocs;
5271   const PetscInt         *srow,*rstarts,*sstarts;
5272   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5273   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5274   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5275   MPI_Request            *rwaits = NULL,*swaits = NULL;
5276   MPI_Status             rstatus;
5277   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5278 
5279   PetscFunctionBegin;
5280   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5282 
5283   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5284     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5285   }
5286   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5287   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5288 
5289   if (size == 1) {
5290     startsj_s = NULL;
5291     bufa_ptr  = NULL;
5292     *B_oth    = NULL;
5293     PetscFunctionReturn(0);
5294   }
5295 
5296   ctx = a->Mvctx;
5297   tag = ((PetscObject)ctx)->tag;
5298 
5299   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5300   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5301   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5302   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5303   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5304   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5305   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5306 
5307   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5308   if (scall == MAT_INITIAL_MATRIX) {
5309     /* i-array */
5310     /*---------*/
5311     /*  post receives */
5312     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5313     for (i=0; i<nrecvs; i++) {
5314       rowlen = rvalues + rstarts[i]*rbs;
5315       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5316       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5317     }
5318 
5319     /* pack the outgoing message */
5320     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5321 
5322     sstartsj[0] = 0;
5323     rstartsj[0] = 0;
5324     len         = 0; /* total length of j or a array to be sent */
5325     if (nsends) {
5326       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5327       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5328     }
5329     for (i=0; i<nsends; i++) {
5330       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5331       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5332       for (j=0; j<nrows; j++) {
5333         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5334         for (l=0; l<sbs; l++) {
5335           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5336 
5337           rowlen[j*sbs+l] = ncols;
5338 
5339           len += ncols;
5340           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5341         }
5342         k++;
5343       }
5344       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5345 
5346       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5347     }
5348     /* recvs and sends of i-array are completed */
5349     i = nrecvs;
5350     while (i--) {
5351       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5352     }
5353     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5354     ierr = PetscFree(svalues);CHKERRQ(ierr);
5355 
5356     /* allocate buffers for sending j and a arrays */
5357     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5358     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5359 
5360     /* create i-array of B_oth */
5361     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5362 
5363     b_othi[0] = 0;
5364     len       = 0; /* total length of j or a array to be received */
5365     k         = 0;
5366     for (i=0; i<nrecvs; i++) {
5367       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5368       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5369       for (j=0; j<nrows; j++) {
5370         b_othi[k+1] = b_othi[k] + rowlen[j];
5371         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5372         k++;
5373       }
5374       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5375     }
5376     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5377 
5378     /* allocate space for j and a arrrays of B_oth */
5379     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5380     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5381 
5382     /* j-array */
5383     /*---------*/
5384     /*  post receives of j-array */
5385     for (i=0; i<nrecvs; i++) {
5386       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5387       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5388     }
5389 
5390     /* pack the outgoing message j-array */
5391     if (nsends) k = sstarts[0];
5392     for (i=0; i<nsends; i++) {
5393       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5394       bufJ  = bufj+sstartsj[i];
5395       for (j=0; j<nrows; j++) {
5396         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5397         for (ll=0; ll<sbs; ll++) {
5398           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5399           for (l=0; l<ncols; l++) {
5400             *bufJ++ = cols[l];
5401           }
5402           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5403         }
5404       }
5405       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5406     }
5407 
5408     /* recvs and sends of j-array are completed */
5409     i = nrecvs;
5410     while (i--) {
5411       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5412     }
5413     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5414   } else if (scall == MAT_REUSE_MATRIX) {
5415     sstartsj = *startsj_s;
5416     rstartsj = *startsj_r;
5417     bufa     = *bufa_ptr;
5418     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5419     b_otha   = b_oth->a;
5420   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5421 
5422   /* a-array */
5423   /*---------*/
5424   /*  post receives of a-array */
5425   for (i=0; i<nrecvs; i++) {
5426     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5427     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5428   }
5429 
5430   /* pack the outgoing message a-array */
5431   if (nsends) k = sstarts[0];
5432   for (i=0; i<nsends; i++) {
5433     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5434     bufA  = bufa+sstartsj[i];
5435     for (j=0; j<nrows; j++) {
5436       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5437       for (ll=0; ll<sbs; ll++) {
5438         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5439         for (l=0; l<ncols; l++) {
5440           *bufA++ = vals[l];
5441         }
5442         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5443       }
5444     }
5445     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5446   }
5447   /* recvs and sends of a-array are completed */
5448   i = nrecvs;
5449   while (i--) {
5450     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5451   }
5452   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5453   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5454 
5455   if (scall == MAT_INITIAL_MATRIX) {
5456     /* put together the new matrix */
5457     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5458 
5459     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5460     /* Since these are PETSc arrays, change flags to free them as necessary. */
5461     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5462     b_oth->free_a  = PETSC_TRUE;
5463     b_oth->free_ij = PETSC_TRUE;
5464     b_oth->nonew   = 0;
5465 
5466     ierr = PetscFree(bufj);CHKERRQ(ierr);
5467     if (!startsj_s || !bufa_ptr) {
5468       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5469       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5470     } else {
5471       *startsj_s = sstartsj;
5472       *startsj_r = rstartsj;
5473       *bufa_ptr  = bufa;
5474     }
5475   }
5476 
5477   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5478   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5479   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 /*@C
5484   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5485 
5486   Not Collective
5487 
5488   Input Parameters:
5489 . A - The matrix in mpiaij format
5490 
5491   Output Parameter:
5492 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5493 . colmap - A map from global column index to local index into lvec
5494 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5495 
5496   Level: developer
5497 
5498 @*/
5499 #if defined(PETSC_USE_CTABLE)
5500 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5501 #else
5502 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5503 #endif
5504 {
5505   Mat_MPIAIJ *a;
5506 
5507   PetscFunctionBegin;
5508   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5509   PetscValidPointer(lvec, 2);
5510   PetscValidPointer(colmap, 3);
5511   PetscValidPointer(multScatter, 4);
5512   a = (Mat_MPIAIJ*) A->data;
5513   if (lvec) *lvec = a->lvec;
5514   if (colmap) *colmap = a->colmap;
5515   if (multScatter) *multScatter = a->Mvctx;
5516   PetscFunctionReturn(0);
5517 }
5518 
5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5520 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5521 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5522 #if defined(PETSC_HAVE_MKL_SPARSE)
5523 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5524 #endif
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5526 #if defined(PETSC_HAVE_ELEMENTAL)
5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5528 #endif
5529 #if defined(PETSC_HAVE_HYPRE)
5530 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5531 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5532 #endif
5533 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5534 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5535 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5536 
5537 /*
5538     Computes (B'*A')' since computing B*A directly is untenable
5539 
5540                n                       p                          p
5541         (              )       (              )         (                  )
5542       m (      A       )  *  n (       B      )   =   m (         C        )
5543         (              )       (              )         (                  )
5544 
5545 */
5546 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5547 {
5548   PetscErrorCode ierr;
5549   Mat            At,Bt,Ct;
5550 
5551   PetscFunctionBegin;
5552   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5553   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5554   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5555   ierr = MatDestroy(&At);CHKERRQ(ierr);
5556   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5557   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5558   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5559   PetscFunctionReturn(0);
5560 }
5561 
5562 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5563 {
5564   PetscErrorCode ierr;
5565   PetscInt       m=A->rmap->n,n=B->cmap->n;
5566   Mat            Cmat;
5567 
5568   PetscFunctionBegin;
5569   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5570   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5571   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5572   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5573   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5574   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5575   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5576   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5577 
5578   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5579 
5580   *C = Cmat;
5581   PetscFunctionReturn(0);
5582 }
5583 
5584 /* ----------------------------------------------------------------*/
5585 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5586 {
5587   PetscErrorCode ierr;
5588 
5589   PetscFunctionBegin;
5590   if (scall == MAT_INITIAL_MATRIX) {
5591     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5592     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5593     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5594   }
5595   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5596   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5597   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 /*MC
5602    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5603 
5604    Options Database Keys:
5605 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5606 
5607   Level: beginner
5608 
5609 .seealso: MatCreateAIJ()
5610 M*/
5611 
5612 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5613 {
5614   Mat_MPIAIJ     *b;
5615   PetscErrorCode ierr;
5616   PetscMPIInt    size;
5617 
5618   PetscFunctionBegin;
5619   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5620 
5621   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5622   B->data       = (void*)b;
5623   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5624   B->assembled  = PETSC_FALSE;
5625   B->insertmode = NOT_SET_VALUES;
5626   b->size       = size;
5627 
5628   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5629 
5630   /* build cache for off array entries formed */
5631   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5632 
5633   b->donotstash  = PETSC_FALSE;
5634   b->colmap      = 0;
5635   b->garray      = 0;
5636   b->roworiented = PETSC_TRUE;
5637 
5638   /* stuff used for matrix vector multiply */
5639   b->lvec  = NULL;
5640   b->Mvctx = NULL;
5641 
5642   /* stuff for MatGetRow() */
5643   b->rowindices   = 0;
5644   b->rowvalues    = 0;
5645   b->getrowactive = PETSC_FALSE;
5646 
5647   /* flexible pointer used in CUSP/CUSPARSE classes */
5648   b->spptr = NULL;
5649 
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5660 #if defined(PETSC_HAVE_MKL_SPARSE)
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5662 #endif
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5665 #if defined(PETSC_HAVE_ELEMENTAL)
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5667 #endif
5668 #if defined(PETSC_HAVE_HYPRE)
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5670 #endif
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5676 #if defined(PETSC_HAVE_HYPRE)
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5678 #endif
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5680   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5681   PetscFunctionReturn(0);
5682 }
5683 
5684 /*@C
5685      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5686          and "off-diagonal" part of the matrix in CSR format.
5687 
5688    Collective on MPI_Comm
5689 
5690    Input Parameters:
5691 +  comm - MPI communicator
5692 .  m - number of local rows (Cannot be PETSC_DECIDE)
5693 .  n - This value should be the same as the local size used in creating the
5694        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5695        calculated if N is given) For square matrices n is almost always m.
5696 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5697 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5698 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5699 .   j - column indices
5700 .   a - matrix values
5701 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5702 .   oj - column indices
5703 -   oa - matrix values
5704 
5705    Output Parameter:
5706 .   mat - the matrix
5707 
5708    Level: advanced
5709 
5710    Notes:
5711        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5712        must free the arrays once the matrix has been destroyed and not before.
5713 
5714        The i and j indices are 0 based
5715 
5716        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5717 
5718        This sets local rows and cannot be used to set off-processor values.
5719 
5720        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5721        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5722        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5723        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5724        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5725        communication if it is known that only local entries will be set.
5726 
5727 .keywords: matrix, aij, compressed row, sparse, parallel
5728 
5729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5730           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5731 @*/
5732 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5733 {
5734   PetscErrorCode ierr;
5735   Mat_MPIAIJ     *maij;
5736 
5737   PetscFunctionBegin;
5738   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5739   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5740   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5741   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5742   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5743   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5744   maij = (Mat_MPIAIJ*) (*mat)->data;
5745 
5746   (*mat)->preallocated = PETSC_TRUE;
5747 
5748   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5749   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5750 
5751   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5752   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5753 
5754   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5755   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5756   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5757   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5758 
5759   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5760   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5761   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5762   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5763   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5764   PetscFunctionReturn(0);
5765 }
5766 
5767 /*
5768     Special version for direct calls from Fortran
5769 */
5770 #include <petsc/private/fortranimpl.h>
5771 
5772 /* Change these macros so can be used in void function */
5773 #undef CHKERRQ
5774 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5775 #undef SETERRQ2
5776 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5777 #undef SETERRQ3
5778 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5779 #undef SETERRQ
5780 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5781 
5782 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5783 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5784 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5785 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5786 #else
5787 #endif
5788 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5789 {
5790   Mat            mat  = *mmat;
5791   PetscInt       m    = *mm, n = *mn;
5792   InsertMode     addv = *maddv;
5793   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5794   PetscScalar    value;
5795   PetscErrorCode ierr;
5796 
5797   MatCheckPreallocated(mat,1);
5798   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5799 
5800 #if defined(PETSC_USE_DEBUG)
5801   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5802 #endif
5803   {
5804     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5805     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5806     PetscBool roworiented = aij->roworiented;
5807 
5808     /* Some Variables required in the macro */
5809     Mat        A                 = aij->A;
5810     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5811     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5812     MatScalar  *aa               = a->a;
5813     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5814     Mat        B                 = aij->B;
5815     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5816     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5817     MatScalar  *ba               = b->a;
5818 
5819     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5820     PetscInt  nonew = a->nonew;
5821     MatScalar *ap1,*ap2;
5822 
5823     PetscFunctionBegin;
5824     for (i=0; i<m; i++) {
5825       if (im[i] < 0) continue;
5826 #if defined(PETSC_USE_DEBUG)
5827       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5828 #endif
5829       if (im[i] >= rstart && im[i] < rend) {
5830         row      = im[i] - rstart;
5831         lastcol1 = -1;
5832         rp1      = aj + ai[row];
5833         ap1      = aa + ai[row];
5834         rmax1    = aimax[row];
5835         nrow1    = ailen[row];
5836         low1     = 0;
5837         high1    = nrow1;
5838         lastcol2 = -1;
5839         rp2      = bj + bi[row];
5840         ap2      = ba + bi[row];
5841         rmax2    = bimax[row];
5842         nrow2    = bilen[row];
5843         low2     = 0;
5844         high2    = nrow2;
5845 
5846         for (j=0; j<n; j++) {
5847           if (roworiented) value = v[i*n+j];
5848           else value = v[i+j*m];
5849           if (in[j] >= cstart && in[j] < cend) {
5850             col = in[j] - cstart;
5851             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5852             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5853           } else if (in[j] < 0) continue;
5854 #if defined(PETSC_USE_DEBUG)
5855           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5856           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5857 #endif
5858           else {
5859             if (mat->was_assembled) {
5860               if (!aij->colmap) {
5861                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5862               }
5863 #if defined(PETSC_USE_CTABLE)
5864               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5865               col--;
5866 #else
5867               col = aij->colmap[in[j]] - 1;
5868 #endif
5869               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5870               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5871                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5872                 col  =  in[j];
5873                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5874                 B     = aij->B;
5875                 b     = (Mat_SeqAIJ*)B->data;
5876                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5877                 rp2   = bj + bi[row];
5878                 ap2   = ba + bi[row];
5879                 rmax2 = bimax[row];
5880                 nrow2 = bilen[row];
5881                 low2  = 0;
5882                 high2 = nrow2;
5883                 bm    = aij->B->rmap->n;
5884                 ba    = b->a;
5885               }
5886             } else col = in[j];
5887             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5888           }
5889         }
5890       } else if (!aij->donotstash) {
5891         if (roworiented) {
5892           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5893         } else {
5894           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5895         }
5896       }
5897     }
5898   }
5899   PetscFunctionReturnVoid();
5900 }
5901