xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fccb18fbb4825433b897d8c2cb64f0bcfabccc52)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
218   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) ap1[_i] += value;   \
444           else                    ap1[_i] = value; \
445           goto a_noinsert; \
446         } \
447       }  \
448       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
449       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
450       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
451       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
452       N = nrow1++ - 1; a->nz++; high1++; \
453       /* shift up all the later entries in this row */ \
454       for (ii=N; ii>=_i; ii--) { \
455         rp1[ii+1] = rp1[ii]; \
456         ap1[ii+1] = ap1[ii]; \
457       } \
458       rp1[_i] = col;  \
459       ap1[_i] = value;  \
460       A->nonzerostate++;\
461       a_noinsert: ; \
462       ailen[row] = nrow1; \
463 }
464 
465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
466   { \
467     if (col <= lastcol2) low2 = 0;                        \
468     else high2 = nrow2;                                   \
469     lastcol2 = col;                                       \
470     while (high2-low2 > 5) {                              \
471       t = (low2+high2)/2;                                 \
472       if (rp2[t] > col) high2 = t;                        \
473       else             low2  = t;                         \
474     }                                                     \
475     for (_i=low2; _i<high2; _i++) {                       \
476       if (rp2[_i] > col) break;                           \
477       if (rp2[_i] == col) {                               \
478         if (addv == ADD_VALUES) ap2[_i] += value;         \
479         else                    ap2[_i] = value;          \
480         goto b_noinsert;                                  \
481       }                                                   \
482     }                                                     \
483     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
484     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
485     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
486     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
487     N = nrow2++ - 1; b->nz++; high2++;                    \
488     /* shift up all the later entries in this row */      \
489     for (ii=N; ii>=_i; ii--) {                            \
490       rp2[ii+1] = rp2[ii];                                \
491       ap2[ii+1] = ap2[ii];                                \
492     }                                                     \
493     rp2[_i] = col;                                        \
494     ap2[_i] = value;                                      \
495     B->nonzerostate++;                                    \
496     b_noinsert: ;                                         \
497     bilen[row] = nrow2;                                   \
498   }
499 
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
527 {
528   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
529   PetscScalar    value;
530   PetscErrorCode ierr;
531   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
532   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
533   PetscBool      roworiented = aij->roworiented;
534 
535   /* Some Variables required in the macro */
536   Mat        A                 = aij->A;
537   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
538   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
539   MatScalar  *aa               = a->a;
540   PetscBool  ignorezeroentries = a->ignorezeroentries;
541   Mat        B                 = aij->B;
542   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
543   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
544   MatScalar  *ba               = b->a;
545 
546   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
547   PetscInt  nonew;
548   MatScalar *ap1,*ap2;
549 
550   PetscFunctionBegin;
551   for (i=0; i<m; i++) {
552     if (im[i] < 0) continue;
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
555 #endif
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = aj + ai[row];
560       ap1      = aa + ai[row];
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = bj + bi[row];
567       ap2      = ba + bi[row];
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j=0; j<n; j++) {
574         if (roworiented) value = v[i*n+j];
575         else             value = v[i+j*m];
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
580           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!aij->colmap) {
588               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 #if defined(PETSC_USE_CTABLE)
591             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
592             col--;
593 #else
594             col = aij->colmap[in[j]] - 1;
595 #endif
596             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
597               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
598               col  =  in[j];
599               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
600               B     = aij->B;
601               b     = (Mat_SeqAIJ*)B->data;
602               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
603               rp2   = bj + bi[row];
604               ap2   = ba + bi[row];
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0) {
612               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
613                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
614               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
619         }
620       }
621     } else {
622       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
638     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
639     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
640 */
641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
642 {
643   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
644   Mat            A           = aij->A; /* diagonal part of the matrix */
645   Mat            B           = aij->B; /* offdiagonal part of the matrix */
646   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
647   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
648   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
649   PetscInt       *ailen      = a->ilen,*aj = a->j;
650   PetscInt       *bilen      = b->ilen,*bj = b->j;
651   PetscInt       am          = aij->A->rmap->n,j;
652   PetscInt       diag_so_far = 0,dnz;
653   PetscInt       offd_so_far = 0,onz;
654 
655   PetscFunctionBegin;
656   /* Iterate over all rows of the matrix */
657   for (j=0; j<am; j++) {
658     dnz = onz = 0;
659     /*  Iterate over all non-zero columns of the current row */
660     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
661       /* If column is in the diagonal */
662       if (mat_j[col] >= cstart && mat_j[col] < cend) {
663         aj[diag_so_far++] = mat_j[col] - cstart;
664         dnz++;
665       } else { /* off-diagonal entries */
666         bj[offd_so_far++] = mat_j[col];
667         onz++;
668       }
669     }
670     ailen[j] = dnz;
671     bilen[j] = onz;
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 /*
677     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
678     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
679     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
680     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
681     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
682 */
683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
684 {
685   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
686   Mat            A      = aij->A; /* diagonal part of the matrix */
687   Mat            B      = aij->B; /* offdiagonal part of the matrix */
688   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
689   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
690   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
691   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
692   PetscInt       *ailen = a->ilen,*aj = a->j;
693   PetscInt       *bilen = b->ilen,*bj = b->j;
694   PetscInt       am     = aij->A->rmap->n,j;
695   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
696   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
697   PetscScalar    *aa = a->a,*ba = b->a;
698 
699   PetscFunctionBegin;
700   /* Iterate over all rows of the matrix */
701   for (j=0; j<am; j++) {
702     dnz_row = onz_row = 0;
703     rowstart_offd = full_offd_i[j];
704     rowstart_diag = full_diag_i[j];
705     /*  Iterate over all non-zero columns of the current row */
706     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
707       /* If column is in the diagonal */
708       if (mat_j[col] >= cstart && mat_j[col] < cend) {
709         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
710         aa[rowstart_diag+dnz_row] = mat_a[col];
711         dnz_row++;
712       } else { /* off-diagonal entries */
713         bj[rowstart_offd+onz_row] = mat_j[col];
714         ba[rowstart_offd+onz_row] = mat_a[col];
715         onz_row++;
716       }
717     }
718     ailen[j] = dnz_row;
719     bilen[j] = onz_row;
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
725 {
726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
727   PetscErrorCode ierr;
728   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
729   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
730 
731   PetscFunctionBegin;
732   for (i=0; i<m; i++) {
733     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
734     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
735     if (idxm[i] >= rstart && idxm[i] < rend) {
736       row = idxm[i] - rstart;
737       for (j=0; j<n; j++) {
738         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
739         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
740         if (idxn[j] >= cstart && idxn[j] < cend) {
741           col  = idxn[j] - cstart;
742           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
743         } else {
744           if (!aij->colmap) {
745             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
746           }
747 #if defined(PETSC_USE_CTABLE)
748           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
749           col--;
750 #else
751           col = aij->colmap[idxn[j]] - 1;
752 #endif
753           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
754           else {
755             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
756           }
757         }
758       }
759     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
765 
766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
767 {
768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
769   PetscErrorCode ierr;
770   PetscInt       nstash,reallocs;
771 
772   PetscFunctionBegin;
773   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
774 
775   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
776   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
777   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
782 {
783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
784   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
785   PetscErrorCode ierr;
786   PetscMPIInt    n;
787   PetscInt       i,j,rstart,ncols,flg;
788   PetscInt       *row,*col;
789   PetscBool      other_disassembled;
790   PetscScalar    *val;
791 
792   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
793 
794   PetscFunctionBegin;
795   if (!aij->donotstash && !mat->nooffprocentries) {
796     while (1) {
797       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
798       if (!flg) break;
799 
800       for (i=0; i<n; ) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         /* Now assemble all these values with a single function call */
808         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
809 
810         i = j;
811       }
812     }
813     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
814   }
815   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
816   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
817 
818   /* determine if any processor has disassembled, if so we must
819      also disassemble ourselfs, in order that we may reassemble. */
820   /*
821      if nonzero structure of submatrix B cannot change then we know that
822      no processor disassembled thus we can skip this stuff
823   */
824   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
825     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
826     if (mat->was_assembled && !other_disassembled) {
827       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
828     }
829   }
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
831     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
832   }
833   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
834   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
835   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
836 
837   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
838 
839   aij->rowvalues = 0;
840 
841   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
842   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
843 
844   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
846     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
859   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
864 {
865   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
866   PetscObjectState sA, sB;
867   PetscInt        *lrows;
868   PetscInt         r, len;
869   PetscBool        cong, lch, gch;
870   PetscErrorCode   ierr;
871 
872   PetscFunctionBegin;
873   /* get locally owned rows */
874   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
875   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
876   /* fix right hand side if needed */
877   if (x && b) {
878     const PetscScalar *xx;
879     PetscScalar       *bb;
880 
881     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
882     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
883     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
884     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
885     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
886     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
887   }
888 
889   sA = mat->A->nonzerostate;
890   sB = mat->B->nonzerostate;
891 
892   if (diag != 0.0 && cong) {
893     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
894     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
895   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
896     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
897     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
898     PetscInt   nnwA, nnwB;
899     PetscBool  nnzA, nnzB;
900 
901     nnwA = aijA->nonew;
902     nnwB = aijB->nonew;
903     nnzA = aijA->keepnonzeropattern;
904     nnzB = aijB->keepnonzeropattern;
905     if (!nnzA) {
906       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
907       aijA->nonew = 0;
908     }
909     if (!nnzB) {
910       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
911       aijB->nonew = 0;
912     }
913     /* Must zero here before the next loop */
914     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
915     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) {
917       const PetscInt row = lrows[r] + A->rmap->rstart;
918       if (row >= A->cmap->N) continue;
919       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
920     }
921     aijA->nonew = nnwA;
922     aijB->nonew = nnwB;
923   } else {
924     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
925     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
926   }
927   ierr = PetscFree(lrows);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
930 
931   /* reduce nonzerostate */
932   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
933   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934   if (gch) A->nonzerostate++;
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
939 {
940   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
941   PetscErrorCode    ierr;
942   PetscMPIInt       n = A->rmap->n;
943   PetscInt          i,j,r,m,p = 0,len = 0;
944   PetscInt          *lrows,*owners = A->rmap->range;
945   PetscSFNode       *rrows;
946   PetscSF           sf;
947   const PetscScalar *xx;
948   PetscScalar       *bb,*mask;
949   Vec               xmask,lmask;
950   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
951   const PetscInt    *aj, *ii,*ridx;
952   PetscScalar       *aa;
953 
954   PetscFunctionBegin;
955   /* Create SF where leaves are input rows and roots are owned rows */
956   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
957   for (r = 0; r < n; ++r) lrows[r] = -1;
958   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
959   for (r = 0; r < N; ++r) {
960     const PetscInt idx   = rows[r];
961     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
962     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
963       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
964     }
965     rrows[r].rank  = p;
966     rrows[r].index = rows[r] - owners[p];
967   }
968   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
969   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
970   /* Collect flags for rows to be zeroed */
971   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
972   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
973   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
974   /* Compress and put in row numbers */
975   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
976   /* zero diagonal part of matrix */
977   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
978   /* handle off diagonal part of matrix */
979   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
980   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
981   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
982   for (i=0; i<len; i++) bb[lrows[i]] = 1;
983   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
984   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
987   if (x && b) { /* this code is buggy when the row and column layout don't match */
988     PetscBool cong;
989 
990     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
991     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
992     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
995     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
996   }
997   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
998   /* remove zeroed rows of off diagonal matrix */
999   ii = aij->i;
1000   for (i=0; i<len; i++) {
1001     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1002   }
1003   /* loop over all elements of off process part of matrix zeroing removed columns*/
1004   if (aij->compressedrow.use) {
1005     m    = aij->compressedrow.nrows;
1006     ii   = aij->compressedrow.i;
1007     ridx = aij->compressedrow.rindex;
1008     for (i=0; i<m; i++) {
1009       n  = ii[i+1] - ii[i];
1010       aj = aij->j + ii[i];
1011       aa = aij->a + ii[i];
1012 
1013       for (j=0; j<n; j++) {
1014         if (PetscAbsScalar(mask[*aj])) {
1015           if (b) bb[*ridx] -= *aa*xx[*aj];
1016           *aa = 0.0;
1017         }
1018         aa++;
1019         aj++;
1020       }
1021       ridx++;
1022     }
1023   } else { /* do not use compressed row format */
1024     m = l->B->rmap->n;
1025     for (i=0; i<m; i++) {
1026       n  = ii[i+1] - ii[i];
1027       aj = aij->j + ii[i];
1028       aa = aij->a + ii[i];
1029       for (j=0; j<n; j++) {
1030         if (PetscAbsScalar(mask[*aj])) {
1031           if (b) bb[i] -= *aa*xx[*aj];
1032           *aa = 0.0;
1033         }
1034         aa++;
1035         aj++;
1036       }
1037     }
1038   }
1039   if (x && b) {
1040     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1041     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1042   }
1043   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1044   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1045   ierr = PetscFree(lrows);CHKERRQ(ierr);
1046 
1047   /* only change matrix nonzero state if pattern was allowed to be changed */
1048   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1049     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1050     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1056 {
1057   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1058   PetscErrorCode ierr;
1059   PetscInt       nt;
1060   VecScatter     Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1064   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1065 
1066   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1067   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1068   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1074 {
1075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087   VecScatter     Mvctx = a->Mvctx;
1088 
1089   PetscFunctionBegin;
1090   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1091   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1093   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1094   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   /* do nondiagonal part */
1105   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1106   /* do local part */
1107   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1108   /* add partial results together */
1109   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1110   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1115 {
1116   MPI_Comm       comm;
1117   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1118   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1119   IS             Me,Notme;
1120   PetscErrorCode ierr;
1121   PetscInt       M,N,first,last,*notme,i;
1122   PetscBool      lf;
1123   PetscMPIInt    size;
1124 
1125   PetscFunctionBegin;
1126   /* Easy test: symmetric diagonal block */
1127   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1128   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1129   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1130   if (!*f) PetscFunctionReturn(0);
1131   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1132   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1133   if (size == 1) PetscFunctionReturn(0);
1134 
1135   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1136   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1137   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1138   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1139   for (i=0; i<first; i++) notme[i] = i;
1140   for (i=last; i<M; i++) notme[i-last+first] = i;
1141   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1142   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1143   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1144   Aoff = Aoffs[0];
1145   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1146   Boff = Boffs[0];
1147   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1148   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1149   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1150   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1151   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1152   ierr = PetscFree(notme);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1157 {
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1166 {
1167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1168   PetscErrorCode ierr;
1169 
1170   PetscFunctionBegin;
1171   /* do nondiagonal part */
1172   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1173   /* do local part */
1174   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1175   /* add partial results together */
1176   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1177   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /*
1182   This only works correctly for square matrices where the subblock A->A is the
1183    diagonal block
1184 */
1185 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1186 {
1187   PetscErrorCode ierr;
1188   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1189 
1190   PetscFunctionBegin;
1191   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1192   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1193   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1204   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1209 {
1210   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214 #if defined(PETSC_USE_LOG)
1215   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1216 #endif
1217   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1218   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1219   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1220   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1221 #if defined(PETSC_USE_CTABLE)
1222   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1223 #else
1224   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1225 #endif
1226   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1227   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1228   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1229   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1230   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1231   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1232   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1233 
1234   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1243 #if defined(PETSC_HAVE_ELEMENTAL)
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1245 #endif
1246 #if defined(PETSC_HAVE_HYPRE)
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1249 #endif
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1259   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1260   PetscErrorCode ierr;
1261   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1262   int            fd;
1263   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1264   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1265   PetscScalar    *column_values;
1266   PetscInt       message_count,flowcontrolcount;
1267   FILE           *file;
1268 
1269   PetscFunctionBegin;
1270   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1272   nz   = A->nz + B->nz;
1273   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1274   if (!rank) {
1275     header[0] = MAT_FILE_CLASSID;
1276     header[1] = mat->rmap->N;
1277     header[2] = mat->cmap->N;
1278 
1279     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     /* get largest number of rows any processor has */
1282     rlen  = mat->rmap->n;
1283     range = mat->rmap->range;
1284     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1285   } else {
1286     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     rlen = mat->rmap->n;
1288   }
1289 
1290   /* load up the local row counts */
1291   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1292   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1293 
1294   /* store the row lengths to the file */
1295   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1296   if (!rank) {
1297     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       rlen = range[i+1] - range[i];
1301       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1303     }
1304     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1307     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1308     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1309   }
1310   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1311 
1312   /* load up the local column indices */
1313   nzmax = nz; /* th processor needs space a largest processor needs */
1314   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1316   cnt   = 0;
1317   for (i=0; i<mat->rmap->n; i++) {
1318     for (j=B->i[i]; j<B->i[i+1]; j++) {
1319       if ((col = garray[B->j[j]]) > cstart) break;
1320       column_indices[cnt++] = col;
1321     }
1322     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1323     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1324   }
1325   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1326 
1327   /* store the column indices to the file */
1328   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1329   if (!rank) {
1330     MPI_Status status;
1331     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     for (i=1; i<size; i++) {
1333       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1334       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1335       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1336       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1337       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1338     }
1339     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1340   } else {
1341     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1342     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1343     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1347 
1348   /* load up the local column values */
1349   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1350   cnt  = 0;
1351   for (i=0; i<mat->rmap->n; i++) {
1352     for (j=B->i[i]; j<B->i[i+1]; j++) {
1353       if (garray[B->j[j]] > cstart) break;
1354       column_values[cnt++] = B->a[j];
1355     }
1356     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1357     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1358   }
1359   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1360 
1361   /* store the column values to the file */
1362   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1363   if (!rank) {
1364     MPI_Status status;
1365     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1366     for (i=1; i<size; i++) {
1367       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1368       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1369       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1370       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1371       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1372     }
1373     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1374   } else {
1375     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1376     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1379   }
1380   ierr = PetscFree(column_values);CHKERRQ(ierr);
1381 
1382   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1383   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #include <petscdraw.h>
1388 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1389 {
1390   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1391   PetscErrorCode    ierr;
1392   PetscMPIInt       rank = aij->rank,size = aij->size;
1393   PetscBool         isdraw,iascii,isbinary;
1394   PetscViewer       sviewer;
1395   PetscViewerFormat format;
1396 
1397   PetscFunctionBegin;
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1401   if (iascii) {
1402     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1403     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1404       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1405       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1406       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1407       for (i=0; i<(PetscInt)size; i++) {
1408         nmax = PetscMax(nmax,nz[i]);
1409         nmin = PetscMin(nmin,nz[i]);
1410         navg += nz[i];
1411       }
1412       ierr = PetscFree(nz);CHKERRQ(ierr);
1413       navg = navg/size;
1414       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1415       PetscFunctionReturn(0);
1416     }
1417     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1418     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1419       MatInfo   info;
1420       PetscBool inodes;
1421 
1422       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1423       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1424       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1425       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1426       if (!inodes) {
1427         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1428                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1429       } else {
1430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1431                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1432       }
1433       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1434       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1435       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1436       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1437       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1438       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1440       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1441       PetscFunctionReturn(0);
1442     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1443       PetscInt inodecount,inodelimit,*inodes;
1444       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1445       if (inodes) {
1446         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1447       } else {
1448         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1449       }
1450       PetscFunctionReturn(0);
1451     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1452       PetscFunctionReturn(0);
1453     }
1454   } else if (isbinary) {
1455     if (size == 1) {
1456       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1457       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1458     } else {
1459       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1460     }
1461     PetscFunctionReturn(0);
1462   } else if (iascii && size == 1) {
1463     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1464     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1465     PetscFunctionReturn(0);
1466   } else if (isdraw) {
1467     PetscDraw draw;
1468     PetscBool isnull;
1469     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1470     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1471     if (isnull) PetscFunctionReturn(0);
1472   }
1473 
1474   { /* assemble the entire matrix onto first processor */
1475     Mat A = NULL, Av;
1476     IS  isrow,iscol;
1477 
1478     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1479     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1480     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1481     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1482 /*  The commented code uses MatCreateSubMatrices instead */
1483 /*
1484     Mat *AA, A = NULL, Av;
1485     IS  isrow,iscol;
1486 
1487     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1488     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1489     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1490     if (!rank) {
1491        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1492        A    = AA[0];
1493        Av   = AA[0];
1494     }
1495     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1496 */
1497     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1498     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1499     /*
1500        Everyone has to call to draw the matrix since the graphics waits are
1501        synchronized across all processors that share the PetscDraw object
1502     */
1503     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1504     if (!rank) {
1505       if (((PetscObject)mat)->name) {
1506         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       }
1508       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1509     }
1510     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1511     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1512     ierr = MatDestroy(&A);CHKERRQ(ierr);
1513   }
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1518 {
1519   PetscErrorCode ierr;
1520   PetscBool      iascii,isdraw,issocket,isbinary;
1521 
1522   PetscFunctionBegin;
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1527   if (iascii || isdraw || isbinary || issocket) {
1528     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1534 {
1535   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1536   PetscErrorCode ierr;
1537   Vec            bb1 = 0;
1538   PetscBool      hasop;
1539 
1540   PetscFunctionBegin;
1541   if (flag == SOR_APPLY_UPPER) {
1542     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1543     PetscFunctionReturn(0);
1544   }
1545 
1546   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1547     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1548   }
1549 
1550   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1551     if (flag & SOR_ZERO_INITIAL_GUESS) {
1552       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1553       its--;
1554     }
1555 
1556     while (its--) {
1557       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1559 
1560       /* update rhs: bb1 = bb - B*x */
1561       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1562       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1563 
1564       /* local sweep */
1565       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1566     }
1567   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1568     if (flag & SOR_ZERO_INITIAL_GUESS) {
1569       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1570       its--;
1571     }
1572     while (its--) {
1573       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1575 
1576       /* update rhs: bb1 = bb - B*x */
1577       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1578       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1579 
1580       /* local sweep */
1581       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1582     }
1583   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1584     if (flag & SOR_ZERO_INITIAL_GUESS) {
1585       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1586       its--;
1587     }
1588     while (its--) {
1589       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1591 
1592       /* update rhs: bb1 = bb - B*x */
1593       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1594       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1595 
1596       /* local sweep */
1597       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1598     }
1599   } else if (flag & SOR_EISENSTAT) {
1600     Vec xx1;
1601 
1602     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1603     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1604 
1605     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607     if (!mat->diag) {
1608       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1609       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1610     }
1611     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1612     if (hasop) {
1613       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1614     } else {
1615       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1616     }
1617     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1618 
1619     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1620 
1621     /* local sweep */
1622     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1623     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1624     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1625   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1626 
1627   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1628 
1629   matin->factorerrortype = mat->A->factorerrortype;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1634 {
1635   Mat            aA,aB,Aperm;
1636   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1637   PetscScalar    *aa,*ba;
1638   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1639   PetscSF        rowsf,sf;
1640   IS             parcolp = NULL;
1641   PetscBool      done;
1642   PetscErrorCode ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1646   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1647   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1648   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1649 
1650   /* Invert row permutation to find out where my rows should go */
1651   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1652   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1653   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1655   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1657 
1658   /* Invert column permutation to find out where my columns should go */
1659   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1660   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1661   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1662   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1663   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1665   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1666 
1667   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1668   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1669   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1670 
1671   /* Find out where my gcols should go */
1672   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1673   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1674   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1675   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1676   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1677   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1679   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1680 
1681   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1683   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1684   for (i=0; i<m; i++) {
1685     PetscInt row = rdest[i],rowner;
1686     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1687     for (j=ai[i]; j<ai[i+1]; j++) {
1688       PetscInt cowner,col = cdest[aj[j]];
1689       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1690       if (rowner == cowner) dnnz[i]++;
1691       else onnz[i]++;
1692     }
1693     for (j=bi[i]; j<bi[i+1]; j++) {
1694       PetscInt cowner,col = gcdest[bj[j]];
1695       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1696       if (rowner == cowner) dnnz[i]++;
1697       else onnz[i]++;
1698     }
1699   }
1700   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1704   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1705 
1706   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1708   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1709   for (i=0; i<m; i++) {
1710     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1711     PetscInt j0,rowlen;
1712     rowlen = ai[i+1] - ai[i];
1713     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1714       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1715       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1716     }
1717     rowlen = bi[i+1] - bi[i];
1718     for (j0=j=0; j<rowlen; j0=j) {
1719       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1720       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1721     }
1722   }
1723   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1726   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1728   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1729   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1730   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1731   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1732   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1733   *B = Aperm;
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1738 {
1739   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1744   if (ghosts) *ghosts = aij->garray;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1749 {
1750   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1751   Mat            A    = mat->A,B = mat->B;
1752   PetscErrorCode ierr;
1753   PetscReal      isend[5],irecv[5];
1754 
1755   PetscFunctionBegin;
1756   info->block_size = 1.0;
1757   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1758 
1759   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1760   isend[3] = info->memory;  isend[4] = info->mallocs;
1761 
1762   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1763 
1764   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1765   isend[3] += info->memory;  isend[4] += info->mallocs;
1766   if (flag == MAT_LOCAL) {
1767     info->nz_used      = isend[0];
1768     info->nz_allocated = isend[1];
1769     info->nz_unneeded  = isend[2];
1770     info->memory       = isend[3];
1771     info->mallocs      = isend[4];
1772   } else if (flag == MAT_GLOBAL_MAX) {
1773     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1774 
1775     info->nz_used      = irecv[0];
1776     info->nz_allocated = irecv[1];
1777     info->nz_unneeded  = irecv[2];
1778     info->memory       = irecv[3];
1779     info->mallocs      = irecv[4];
1780   } else if (flag == MAT_GLOBAL_SUM) {
1781     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1782 
1783     info->nz_used      = irecv[0];
1784     info->nz_allocated = irecv[1];
1785     info->nz_unneeded  = irecv[2];
1786     info->memory       = irecv[3];
1787     info->mallocs      = irecv[4];
1788   }
1789   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1790   info->fill_ratio_needed = 0;
1791   info->factor_mallocs    = 0;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1796 {
1797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1798   PetscErrorCode ierr;
1799 
1800   PetscFunctionBegin;
1801   switch (op) {
1802   case MAT_NEW_NONZERO_LOCATIONS:
1803   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1804   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1805   case MAT_KEEP_NONZERO_PATTERN:
1806   case MAT_NEW_NONZERO_LOCATION_ERR:
1807   case MAT_USE_INODES:
1808   case MAT_IGNORE_ZERO_ENTRIES:
1809     MatCheckPreallocated(A,1);
1810     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1811     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_ROW_ORIENTED:
1814     MatCheckPreallocated(A,1);
1815     a->roworiented = flg;
1816 
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_NEW_DIAGONALS:
1821     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1822     break;
1823   case MAT_IGNORE_OFF_PROC_ENTRIES:
1824     a->donotstash = flg;
1825     break;
1826   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1827   case MAT_SPD:
1828   case MAT_SYMMETRIC:
1829   case MAT_STRUCTURALLY_SYMMETRIC:
1830   case MAT_HERMITIAN:
1831   case MAT_SYMMETRY_ETERNAL:
1832     break;
1833   case MAT_SUBMAT_SINGLEIS:
1834     A->submat_singleis = flg;
1835     break;
1836   case MAT_STRUCTURE_ONLY:
1837     /* The option is handled directly by MatSetOption() */
1838     break;
1839   default:
1840     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1841   }
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1848   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1849   PetscErrorCode ierr;
1850   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1851   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1852   PetscInt       *cmap,*idx_p;
1853 
1854   PetscFunctionBegin;
1855   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1856   mat->getrowactive = PETSC_TRUE;
1857 
1858   if (!mat->rowvalues && (idx || v)) {
1859     /*
1860         allocate enough space to hold information from the longest row.
1861     */
1862     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1863     PetscInt   max = 1,tmp;
1864     for (i=0; i<matin->rmap->n; i++) {
1865       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1866       if (max < tmp) max = tmp;
1867     }
1868     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1869   }
1870 
1871   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1872   lrow = row - rstart;
1873 
1874   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1875   if (!v)   {pvA = 0; pvB = 0;}
1876   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1877   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   nztot = nzA + nzB;
1880 
1881   cmap = mat->garray;
1882   if (v  || idx) {
1883     if (nztot) {
1884       /* Sort by increasing column numbers, assuming A and B already sorted */
1885       PetscInt imark = -1;
1886       if (v) {
1887         *v = v_p = mat->rowvalues;
1888         for (i=0; i<nzB; i++) {
1889           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1890           else break;
1891         }
1892         imark = i;
1893         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1894         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1895       }
1896       if (idx) {
1897         *idx = idx_p = mat->rowindices;
1898         if (imark > -1) {
1899           for (i=0; i<imark; i++) {
1900             idx_p[i] = cmap[cworkB[i]];
1901           }
1902         } else {
1903           for (i=0; i<nzB; i++) {
1904             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1905             else break;
1906           }
1907           imark = i;
1908         }
1909         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1910         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1911       }
1912     } else {
1913       if (idx) *idx = 0;
1914       if (v)   *v   = 0;
1915     }
1916   }
1917   *nz  = nztot;
1918   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1919   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1924 {
1925   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1926 
1927   PetscFunctionBegin;
1928   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1929   aij->getrowactive = PETSC_FALSE;
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1934 {
1935   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1936   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1937   PetscErrorCode ierr;
1938   PetscInt       i,j,cstart = mat->cmap->rstart;
1939   PetscReal      sum = 0.0;
1940   MatScalar      *v;
1941 
1942   PetscFunctionBegin;
1943   if (aij->size == 1) {
1944     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1945   } else {
1946     if (type == NORM_FROBENIUS) {
1947       v = amat->a;
1948       for (i=0; i<amat->nz; i++) {
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950       }
1951       v = bmat->a;
1952       for (i=0; i<bmat->nz; i++) {
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954       }
1955       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1956       *norm = PetscSqrtReal(*norm);
1957       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1958     } else if (type == NORM_1) { /* max column norm */
1959       PetscReal *tmp,*tmp2;
1960       PetscInt  *jj,*garray = aij->garray;
1961       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1962       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1963       *norm = 0.0;
1964       v     = amat->a; jj = amat->j;
1965       for (j=0; j<amat->nz; j++) {
1966         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1967       }
1968       v = bmat->a; jj = bmat->j;
1969       for (j=0; j<bmat->nz; j++) {
1970         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1971       }
1972       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1973       for (j=0; j<mat->cmap->N; j++) {
1974         if (tmp2[j] > *norm) *norm = tmp2[j];
1975       }
1976       ierr = PetscFree(tmp);CHKERRQ(ierr);
1977       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1978       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1979     } else if (type == NORM_INFINITY) { /* max row norm */
1980       PetscReal ntemp = 0.0;
1981       for (j=0; j<aij->A->rmap->n; j++) {
1982         v   = amat->a + amat->i[j];
1983         sum = 0.0;
1984         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1985           sum += PetscAbsScalar(*v); v++;
1986         }
1987         v = bmat->a + bmat->i[j];
1988         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1989           sum += PetscAbsScalar(*v); v++;
1990         }
1991         if (sum > ntemp) ntemp = sum;
1992       }
1993       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2001 {
2002   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2003   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2004   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2005   PetscErrorCode ierr;
2006   Mat            B,A_diag,*B_diag;
2007   MatScalar      *array;
2008 
2009   PetscFunctionBegin;
2010   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2011   ai = Aloc->i; aj = Aloc->j;
2012   bi = Bloc->i; bj = Bloc->j;
2013   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2014     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2015     PetscSFNode          *oloc;
2016     PETSC_UNUSED PetscSF sf;
2017 
2018     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2019     /* compute d_nnz for preallocation */
2020     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<ai[ma]; i++) {
2022       d_nnz[aj[i]]++;
2023     }
2024     /* compute local off-diagonal contributions */
2025     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2026     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2027     /* map those to global */
2028     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2029     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2030     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2031     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2032     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2034     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2035 
2036     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2037     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2038     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2039     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2040     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2041     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2042   } else {
2043     B    = *matout;
2044     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2045   }
2046 
2047   b           = (Mat_MPIAIJ*)B->data;
2048   A_diag      = a->A;
2049   B_diag      = &b->A;
2050   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2051   A_diag_ncol = A_diag->cmap->N;
2052   B_diag_ilen = sub_B_diag->ilen;
2053   B_diag_i    = sub_B_diag->i;
2054 
2055   /* Set ilen for diagonal of B */
2056   for (i=0; i<A_diag_ncol; i++) {
2057     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2058   }
2059 
2060   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2061   very quickly (=without using MatSetValues), because all writes are local. */
2062   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2063 
2064   /* copy over the B part */
2065   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2066   array = Bloc->a;
2067   row   = A->rmap->rstart;
2068   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2069   cols_tmp = cols;
2070   for (i=0; i<mb; i++) {
2071     ncol = bi[i+1]-bi[i];
2072     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2073     row++;
2074     array += ncol; cols_tmp += ncol;
2075   }
2076   ierr = PetscFree(cols);CHKERRQ(ierr);
2077 
2078   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2081     *matout = B;
2082   } else {
2083     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2089 {
2090   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2091   Mat            a    = aij->A,b = aij->B;
2092   PetscErrorCode ierr;
2093   PetscInt       s1,s2,s3;
2094 
2095   PetscFunctionBegin;
2096   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2097   if (rr) {
2098     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2099     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2100     /* Overlap communication with computation. */
2101     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2102   }
2103   if (ll) {
2104     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2105     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2106     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2107   }
2108   /* scale  the diagonal block */
2109   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2110 
2111   if (rr) {
2112     /* Do a scatter end and then right scale the off-diagonal block */
2113     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2114     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2115   }
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2120 {
2121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2122   PetscErrorCode ierr;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2130 {
2131   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2132   Mat            a,b,c,d;
2133   PetscBool      flg;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   a = matA->A; b = matA->B;
2138   c = matB->A; d = matB->B;
2139 
2140   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2141   if (flg) {
2142     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2143   }
2144   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2149 {
2150   PetscErrorCode ierr;
2151   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2152   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2153 
2154   PetscFunctionBegin;
2155   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2156   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2157     /* because of the column compression in the off-processor part of the matrix a->B,
2158        the number of columns in a->B and b->B may be different, hence we cannot call
2159        the MatCopy() directly on the two parts. If need be, we can provide a more
2160        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2161        then copying the submatrices */
2162     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2163   } else {
2164     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2165     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2166   }
2167   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2172 {
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /*
2181    Computes the number of nonzeros per row needed for preallocation when X and Y
2182    have different nonzero structure.
2183 */
2184 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2185 {
2186   PetscInt       i,j,k,nzx,nzy;
2187 
2188   PetscFunctionBegin;
2189   /* Set the number of nonzeros in the new matrix */
2190   for (i=0; i<m; i++) {
2191     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2192     nzx = xi[i+1] - xi[i];
2193     nzy = yi[i+1] - yi[i];
2194     nnz[i] = 0;
2195     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2196       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2197       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2198       nnz[i]++;
2199     }
2200     for (; k<nzy; k++) nnz[i]++;
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2206 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2207 {
2208   PetscErrorCode ierr;
2209   PetscInt       m = Y->rmap->N;
2210   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2211   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2212 
2213   PetscFunctionBegin;
2214   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2219 {
2220   PetscErrorCode ierr;
2221   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2222   PetscBLASInt   bnz,one=1;
2223   Mat_SeqAIJ     *x,*y;
2224 
2225   PetscFunctionBegin;
2226   if (str == SAME_NONZERO_PATTERN) {
2227     PetscScalar alpha = a;
2228     x    = (Mat_SeqAIJ*)xx->A->data;
2229     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2230     y    = (Mat_SeqAIJ*)yy->A->data;
2231     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2232     x    = (Mat_SeqAIJ*)xx->B->data;
2233     y    = (Mat_SeqAIJ*)yy->B->data;
2234     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2235     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2236     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2237   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2238     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2239   } else {
2240     Mat      B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2248     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2250     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2251     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2252     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2253     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2255     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2261 
2262 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2263 {
2264 #if defined(PETSC_USE_COMPLEX)
2265   PetscErrorCode ierr;
2266   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2270   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2271 #else
2272   PetscFunctionBegin;
2273 #endif
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2278 {
2279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2284   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2289 {
2290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2295   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2300 {
2301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2302   PetscErrorCode ierr;
2303   PetscInt       i,*idxb = 0;
2304   PetscScalar    *va,*vb;
2305   Vec            vtmp;
2306 
2307   PetscFunctionBegin;
2308   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2309   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2310   if (idx) {
2311     for (i=0; i<A->rmap->n; i++) {
2312       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2313     }
2314   }
2315 
2316   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2317   if (idx) {
2318     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2319   }
2320   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2321   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2322 
2323   for (i=0; i<A->rmap->n; i++) {
2324     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2325       va[i] = vb[i];
2326       if (idx) idx[i] = a->garray[idxb[i]];
2327     }
2328   }
2329 
2330   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2331   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2332   ierr = PetscFree(idxb);CHKERRQ(ierr);
2333   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2338 {
2339   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,*idxb = 0;
2342   PetscScalar    *va,*vb;
2343   Vec            vtmp;
2344 
2345   PetscFunctionBegin;
2346   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2347   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2348   if (idx) {
2349     for (i=0; i<A->cmap->n; i++) {
2350       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2351     }
2352   }
2353 
2354   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2355   if (idx) {
2356     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2357   }
2358   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2359   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2360 
2361   for (i=0; i<A->rmap->n; i++) {
2362     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2363       va[i] = vb[i];
2364       if (idx) idx[i] = a->garray[idxb[i]];
2365     }
2366   }
2367 
2368   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2369   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2370   ierr = PetscFree(idxb);CHKERRQ(ierr);
2371   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2376 {
2377   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2378   PetscInt       n      = A->rmap->n;
2379   PetscInt       cstart = A->cmap->rstart;
2380   PetscInt       *cmap  = mat->garray;
2381   PetscInt       *diagIdx, *offdiagIdx;
2382   Vec            diagV, offdiagV;
2383   PetscScalar    *a, *diagA, *offdiagA;
2384   PetscInt       r;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2391   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2392   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2394   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2395   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   for (r = 0; r < n; ++r) {
2397     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2398       a[r]   = diagA[r];
2399       idx[r] = cstart + diagIdx[r];
2400     } else {
2401       a[r]   = offdiagA[r];
2402       idx[r] = cmap[offdiagIdx[r]];
2403     }
2404   }
2405   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2409   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2410   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2415 {
2416   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2417   PetscInt       n      = A->rmap->n;
2418   PetscInt       cstart = A->cmap->rstart;
2419   PetscInt       *cmap  = mat->garray;
2420   PetscInt       *diagIdx, *offdiagIdx;
2421   Vec            diagV, offdiagV;
2422   PetscScalar    *a, *diagA, *offdiagA;
2423   PetscInt       r;
2424   PetscErrorCode ierr;
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2428   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2429   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2430   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2431   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2432   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2433   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2434   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2435   for (r = 0; r < n; ++r) {
2436     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2437       a[r]   = diagA[r];
2438       idx[r] = cstart + diagIdx[r];
2439     } else {
2440       a[r]   = offdiagA[r];
2441       idx[r] = cmap[offdiagIdx[r]];
2442     }
2443   }
2444   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2446   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2447   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2448   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2449   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2454 {
2455   PetscErrorCode ierr;
2456   Mat            *dummy;
2457 
2458   PetscFunctionBegin;
2459   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2460   *newmat = *dummy;
2461   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2466 {
2467   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2468   PetscErrorCode ierr;
2469 
2470   PetscFunctionBegin;
2471   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2472   A->factorerrortype = a->A->factorerrortype;
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2477 {
2478   PetscErrorCode ierr;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2480 
2481   PetscFunctionBegin;
2482   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2483   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2484   if (x->assembled) {
2485     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2486   } else {
2487     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2488   }
2489   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2490   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2495 {
2496   PetscFunctionBegin;
2497   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2498   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 /*@
2503    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2504 
2505    Collective on Mat
2506 
2507    Input Parameters:
2508 +    A - the matrix
2509 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2510 
2511  Level: advanced
2512 
2513 @*/
2514 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2515 {
2516   PetscErrorCode       ierr;
2517 
2518   PetscFunctionBegin;
2519   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2524 {
2525   PetscErrorCode       ierr;
2526   PetscBool            sc = PETSC_FALSE,flg;
2527 
2528   PetscFunctionBegin;
2529   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2530   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2531   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2532   if (flg) {
2533     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2534   }
2535   ierr = PetscOptionsTail();CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2540 {
2541   PetscErrorCode ierr;
2542   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2543   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2544 
2545   PetscFunctionBegin;
2546   if (!Y->preallocated) {
2547     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2548   } else if (!aij->nz) {
2549     PetscInt nonew = aij->nonew;
2550     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2551     aij->nonew = nonew;
2552   }
2553   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2558 {
2559   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2560   PetscErrorCode ierr;
2561 
2562   PetscFunctionBegin;
2563   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2564   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2565   if (d) {
2566     PetscInt rstart;
2567     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2568     *d += rstart;
2569 
2570   }
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2575 {
2576   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /* -------------------------------------------------------------------*/
2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2586                                        MatGetRow_MPIAIJ,
2587                                        MatRestoreRow_MPIAIJ,
2588                                        MatMult_MPIAIJ,
2589                                 /* 4*/ MatMultAdd_MPIAIJ,
2590                                        MatMultTranspose_MPIAIJ,
2591                                        MatMultTransposeAdd_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*10*/ 0,
2596                                        0,
2597                                        0,
2598                                        MatSOR_MPIAIJ,
2599                                        MatTranspose_MPIAIJ,
2600                                 /*15*/ MatGetInfo_MPIAIJ,
2601                                        MatEqual_MPIAIJ,
2602                                        MatGetDiagonal_MPIAIJ,
2603                                        MatDiagonalScale_MPIAIJ,
2604                                        MatNorm_MPIAIJ,
2605                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2606                                        MatAssemblyEnd_MPIAIJ,
2607                                        MatSetOption_MPIAIJ,
2608                                        MatZeroEntries_MPIAIJ,
2609                                 /*24*/ MatZeroRows_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*29*/ MatSetUp_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        MatGetDiagonalBlock_MPIAIJ,
2618                                        0,
2619                                 /*34*/ MatDuplicate_MPIAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*39*/ MatAXPY_MPIAIJ,
2625                                        MatCreateSubMatrices_MPIAIJ,
2626                                        MatIncreaseOverlap_MPIAIJ,
2627                                        MatGetValues_MPIAIJ,
2628                                        MatCopy_MPIAIJ,
2629                                 /*44*/ MatGetRowMax_MPIAIJ,
2630                                        MatScale_MPIAIJ,
2631                                        MatShift_MPIAIJ,
2632                                        MatDiagonalSet_MPIAIJ,
2633                                        MatZeroRowsColumns_MPIAIJ,
2634                                 /*49*/ MatSetRandom_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2640                                        0,
2641                                        MatSetUnfactored_MPIAIJ,
2642                                        MatPermute_MPIAIJ,
2643                                        0,
2644                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2645                                        MatDestroy_MPIAIJ,
2646                                        MatView_MPIAIJ,
2647                                        0,
2648                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2650                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655                                        MatGetRowMinAbs_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*75*/ MatFDColoringApply_AIJ,
2661                                        MatSetFromOptions_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        MatFindZeroDiagonals_MPIAIJ,
2665                                 /*80*/ 0,
2666                                        0,
2667                                        0,
2668                                 /*83*/ MatLoad_MPIAIJ,
2669                                        MatIsSymmetric_MPIAIJ,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2675                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2676                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2677                                        MatPtAP_MPIAIJ_MPIAIJ,
2678                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2679                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                 /*99*/ 0,
2685                                        0,
2686                                        0,
2687                                        MatConjugate_MPIAIJ,
2688                                        0,
2689                                 /*104*/MatSetValuesRow_MPIAIJ,
2690                                        MatRealPart_MPIAIJ,
2691                                        MatImaginaryPart_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                 /*109*/0,
2695                                        0,
2696                                        MatGetRowMin_MPIAIJ,
2697                                        0,
2698                                        MatMissingDiagonal_MPIAIJ,
2699                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2700                                        0,
2701                                        MatGetGhosts_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                 /*119*/0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        MatGetMultiProcBlock_MPIAIJ,
2709                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2710                                        MatGetColumnNorms_MPIAIJ,
2711                                        MatInvertBlockDiagonal_MPIAIJ,
2712                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2713                                        MatCreateSubMatricesMPI_MPIAIJ,
2714                                 /*129*/0,
2715                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2716                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2717                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2718                                        0,
2719                                 /*134*/0,
2720                                        0,
2721                                        MatRARt_MPIAIJ_MPIAIJ,
2722                                        0,
2723                                        0,
2724                                 /*139*/MatSetBlockSizes_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                        MatFDColoringSetUp_MPIXAIJ,
2728                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2729                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2730 };
2731 
2732 /* ----------------------------------------------------------------------------------------*/
2733 
2734 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2735 {
2736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2741   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2746 {
2747   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2752   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2757 {
2758   Mat_MPIAIJ     *b;
2759   PetscErrorCode ierr;
2760   PetscMPIInt    size;
2761 
2762   PetscFunctionBegin;
2763   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2765   b = (Mat_MPIAIJ*)B->data;
2766 
2767 #if defined(PETSC_USE_CTABLE)
2768   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2769 #else
2770   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2771 #endif
2772   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2773   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2774   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2775 
2776   /* Because the B will have been resized we simply destroy it and create a new one each time */
2777   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2778   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2779   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2780   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2781   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2782   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2783   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2784 
2785   if (!B->preallocated) {
2786     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2787     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2788     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2791   }
2792 
2793   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2794   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2795   B->preallocated  = PETSC_TRUE;
2796   B->was_assembled = PETSC_FALSE;
2797   B->assembled     = PETSC_FALSE;
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2802 {
2803   Mat_MPIAIJ     *b;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2808   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2809   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2810   b = (Mat_MPIAIJ*)B->data;
2811 
2812 #if defined(PETSC_USE_CTABLE)
2813   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2814 #else
2815   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2816 #endif
2817   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2818   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2819   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2820 
2821   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2822   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2823   B->preallocated  = PETSC_TRUE;
2824   B->was_assembled = PETSC_FALSE;
2825   B->assembled = PETSC_FALSE;
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2830 {
2831   Mat            mat;
2832   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2833   PetscErrorCode ierr;
2834 
2835   PetscFunctionBegin;
2836   *newmat = 0;
2837   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2838   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2839   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2840   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2841   a       = (Mat_MPIAIJ*)mat->data;
2842 
2843   mat->factortype   = matin->factortype;
2844   mat->assembled    = PETSC_TRUE;
2845   mat->insertmode   = NOT_SET_VALUES;
2846   mat->preallocated = PETSC_TRUE;
2847 
2848   a->size         = oldmat->size;
2849   a->rank         = oldmat->rank;
2850   a->donotstash   = oldmat->donotstash;
2851   a->roworiented  = oldmat->roworiented;
2852   a->rowindices   = 0;
2853   a->rowvalues    = 0;
2854   a->getrowactive = PETSC_FALSE;
2855 
2856   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2857   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2858 
2859   if (oldmat->colmap) {
2860 #if defined(PETSC_USE_CTABLE)
2861     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2862 #else
2863     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2864     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2865     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2866 #endif
2867   } else a->colmap = 0;
2868   if (oldmat->garray) {
2869     PetscInt len;
2870     len  = oldmat->B->cmap->n;
2871     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2872     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2873     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2874   } else a->garray = 0;
2875 
2876   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2877   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2878   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2879   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2880 
2881   if (oldmat->Mvctx_mpi1) {
2882     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2883     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2884   }
2885 
2886   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2887   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2888   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2889   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2890   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2891   *newmat = mat;
2892   PetscFunctionReturn(0);
2893 }
2894 
2895 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2896 {
2897   PetscBool      isbinary, ishdf5;
2898   PetscErrorCode ierr;
2899 
2900   PetscFunctionBegin;
2901   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2902   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2903   /* force binary viewer to load .info file if it has not yet done so */
2904   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2905   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2906   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2907   if (isbinary) {
2908     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2909   } else if (ishdf5) {
2910 #if defined(PETSC_HAVE_HDF5)
2911     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2912 #else
2913     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2914 #endif
2915   } else {
2916     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2917   }
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2922 {
2923   PetscScalar    *vals,*svals;
2924   MPI_Comm       comm;
2925   PetscErrorCode ierr;
2926   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2927   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2928   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2929   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2930   PetscInt       cend,cstart,n,*rowners;
2931   int            fd;
2932   PetscInt       bs = newMat->rmap->bs;
2933 
2934   PetscFunctionBegin;
2935   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2936   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2937   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2938   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2939   if (!rank) {
2940     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2941     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2942     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2943   }
2944 
2945   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2946   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2947   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2948   if (bs < 0) bs = 1;
2949 
2950   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2951   M    = header[1]; N = header[2];
2952 
2953   /* If global sizes are set, check if they are consistent with that given in the file */
2954   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2955   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2956 
2957   /* determine ownership of all (block) rows */
2958   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2959   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2960   else m = newMat->rmap->n; /* Set by user */
2961 
2962   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2963   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2964 
2965   /* First process needs enough room for process with most rows */
2966   if (!rank) {
2967     mmax = rowners[1];
2968     for (i=2; i<=size; i++) {
2969       mmax = PetscMax(mmax, rowners[i]);
2970     }
2971   } else mmax = -1;             /* unused, but compilers complain */
2972 
2973   rowners[0] = 0;
2974   for (i=2; i<=size; i++) {
2975     rowners[i] += rowners[i-1];
2976   }
2977   rstart = rowners[rank];
2978   rend   = rowners[rank+1];
2979 
2980   /* distribute row lengths to all processors */
2981   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2982   if (!rank) {
2983     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2984     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2985     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2986     for (j=0; j<m; j++) {
2987       procsnz[0] += ourlens[j];
2988     }
2989     for (i=1; i<size; i++) {
2990       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2991       /* calculate the number of nonzeros on each processor */
2992       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2993         procsnz[i] += rowlengths[j];
2994       }
2995       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2996     }
2997     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2998   } else {
2999     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3000   }
3001 
3002   if (!rank) {
3003     /* determine max buffer needed and allocate it */
3004     maxnz = 0;
3005     for (i=0; i<size; i++) {
3006       maxnz = PetscMax(maxnz,procsnz[i]);
3007     }
3008     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3009 
3010     /* read in my part of the matrix column indices  */
3011     nz   = procsnz[0];
3012     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3013     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3014 
3015     /* read in every one elses and ship off */
3016     for (i=1; i<size; i++) {
3017       nz   = procsnz[i];
3018       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3019       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3020     }
3021     ierr = PetscFree(cols);CHKERRQ(ierr);
3022   } else {
3023     /* determine buffer space needed for message */
3024     nz = 0;
3025     for (i=0; i<m; i++) {
3026       nz += ourlens[i];
3027     }
3028     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3029 
3030     /* receive message of column indices*/
3031     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3032   }
3033 
3034   /* determine column ownership if matrix is not square */
3035   if (N != M) {
3036     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3037     else n = newMat->cmap->n;
3038     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3039     cstart = cend - n;
3040   } else {
3041     cstart = rstart;
3042     cend   = rend;
3043     n      = cend - cstart;
3044   }
3045 
3046   /* loop over local rows, determining number of off diagonal entries */
3047   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3048   jj   = 0;
3049   for (i=0; i<m; i++) {
3050     for (j=0; j<ourlens[i]; j++) {
3051       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3052       jj++;
3053     }
3054   }
3055 
3056   for (i=0; i<m; i++) {
3057     ourlens[i] -= offlens[i];
3058   }
3059   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3060 
3061   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3062 
3063   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3064 
3065   for (i=0; i<m; i++) {
3066     ourlens[i] += offlens[i];
3067   }
3068 
3069   if (!rank) {
3070     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3071 
3072     /* read in my part of the matrix numerical values  */
3073     nz   = procsnz[0];
3074     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3075 
3076     /* insert into matrix */
3077     jj      = rstart;
3078     smycols = mycols;
3079     svals   = vals;
3080     for (i=0; i<m; i++) {
3081       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3082       smycols += ourlens[i];
3083       svals   += ourlens[i];
3084       jj++;
3085     }
3086 
3087     /* read in other processors and ship out */
3088     for (i=1; i<size; i++) {
3089       nz   = procsnz[i];
3090       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3091       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3092     }
3093     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3094   } else {
3095     /* receive numeric values */
3096     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3097 
3098     /* receive message of values*/
3099     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3100 
3101     /* insert into matrix */
3102     jj      = rstart;
3103     smycols = mycols;
3104     svals   = vals;
3105     for (i=0; i<m; i++) {
3106       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3107       smycols += ourlens[i];
3108       svals   += ourlens[i];
3109       jj++;
3110     }
3111   }
3112   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3113   ierr = PetscFree(vals);CHKERRQ(ierr);
3114   ierr = PetscFree(mycols);CHKERRQ(ierr);
3115   ierr = PetscFree(rowners);CHKERRQ(ierr);
3116   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3117   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3118   PetscFunctionReturn(0);
3119 }
3120 
3121 /* Not scalable because of ISAllGather() unless getting all columns. */
3122 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3123 {
3124   PetscErrorCode ierr;
3125   IS             iscol_local;
3126   PetscBool      isstride;
3127   PetscMPIInt    lisstride=0,gisstride;
3128 
3129   PetscFunctionBegin;
3130   /* check if we are grabbing all columns*/
3131   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3132 
3133   if (isstride) {
3134     PetscInt  start,len,mstart,mlen;
3135     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3136     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3137     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3138     if (mstart == start && mlen-mstart == len) lisstride = 1;
3139   }
3140 
3141   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3142   if (gisstride) {
3143     PetscInt N;
3144     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3145     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3146     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3147     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3148   } else {
3149     PetscInt cbs;
3150     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3151     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3152     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3153   }
3154 
3155   *isseq = iscol_local;
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 /*
3160  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3161  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3162 
3163  Input Parameters:
3164    mat - matrix
3165    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3166            i.e., mat->rstart <= isrow[i] < mat->rend
3167    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3168            i.e., mat->cstart <= iscol[i] < mat->cend
3169  Output Parameter:
3170    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3171    iscol_o - sequential column index set for retrieving mat->B
3172    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3173  */
3174 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3175 {
3176   PetscErrorCode ierr;
3177   Vec            x,cmap;
3178   const PetscInt *is_idx;
3179   PetscScalar    *xarray,*cmaparray;
3180   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3181   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3182   Mat            B=a->B;
3183   Vec            lvec=a->lvec,lcmap;
3184   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3185   MPI_Comm       comm;
3186   VecScatter     Mvctx=a->Mvctx;
3187 
3188   PetscFunctionBegin;
3189   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3190   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3191 
3192   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3193   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3194   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3195   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3196   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3197 
3198   /* Get start indices */
3199   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3200   isstart -= ncols;
3201   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3202 
3203   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3204   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3205   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3206   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3207   for (i=0; i<ncols; i++) {
3208     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3209     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3210     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3211   }
3212   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3213   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3214   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3215 
3216   /* Get iscol_d */
3217   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3218   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3219   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3220 
3221   /* Get isrow_d */
3222   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3223   rstart = mat->rmap->rstart;
3224   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3225   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3226   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3227   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3228 
3229   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3230   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3231   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3232 
3233   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3234   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3236 
3237   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3238 
3239   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3240   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3241 
3242   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3243   /* off-process column indices */
3244   count = 0;
3245   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3246   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3247 
3248   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3249   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3250   for (i=0; i<Bn; i++) {
3251     if (PetscRealPart(xarray[i]) > -1.0) {
3252       idx[count]     = i;                   /* local column index in off-diagonal part B */
3253       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3254       count++;
3255     }
3256   }
3257   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3258   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3259 
3260   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3261   /* cannot ensure iscol_o has same blocksize as iscol! */
3262 
3263   ierr = PetscFree(idx);CHKERRQ(ierr);
3264   *garray = cmap1;
3265 
3266   ierr = VecDestroy(&x);CHKERRQ(ierr);
3267   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3268   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3269   PetscFunctionReturn(0);
3270 }
3271 
3272 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3273 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3274 {
3275   PetscErrorCode ierr;
3276   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3277   Mat            M = NULL;
3278   MPI_Comm       comm;
3279   IS             iscol_d,isrow_d,iscol_o;
3280   Mat            Asub = NULL,Bsub = NULL;
3281   PetscInt       n;
3282 
3283   PetscFunctionBegin;
3284   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3285 
3286   if (call == MAT_REUSE_MATRIX) {
3287     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3288     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3289     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3290 
3291     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3292     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3293 
3294     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3295     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3296 
3297     /* Update diagonal and off-diagonal portions of submat */
3298     asub = (Mat_MPIAIJ*)(*submat)->data;
3299     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3300     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3301     if (n) {
3302       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3303     }
3304     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3305     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3306 
3307   } else { /* call == MAT_INITIAL_MATRIX) */
3308     const PetscInt *garray;
3309     PetscInt        BsubN;
3310 
3311     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3312     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3313 
3314     /* Create local submatrices Asub and Bsub */
3315     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3316     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3317 
3318     /* Create submatrix M */
3319     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3320 
3321     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3322     asub = (Mat_MPIAIJ*)M->data;
3323 
3324     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3325     n = asub->B->cmap->N;
3326     if (BsubN > n) {
3327       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3328       const PetscInt *idx;
3329       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3330       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3331 
3332       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3333       j = 0;
3334       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3335       for (i=0; i<n; i++) {
3336         if (j >= BsubN) break;
3337         while (subgarray[i] > garray[j]) j++;
3338 
3339         if (subgarray[i] == garray[j]) {
3340           idx_new[i] = idx[j++];
3341         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3342       }
3343       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3344 
3345       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3346       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3347 
3348     } else if (BsubN < n) {
3349       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3350     }
3351 
3352     ierr = PetscFree(garray);CHKERRQ(ierr);
3353     *submat = M;
3354 
3355     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3356     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3357     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3358 
3359     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3360     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3361 
3362     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3363     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3364   }
3365   PetscFunctionReturn(0);
3366 }
3367 
3368 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3369 {
3370   PetscErrorCode ierr;
3371   IS             iscol_local=NULL,isrow_d;
3372   PetscInt       csize;
3373   PetscInt       n,i,j,start,end;
3374   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3375   MPI_Comm       comm;
3376 
3377   PetscFunctionBegin;
3378   /* If isrow has same processor distribution as mat,
3379      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3380   if (call == MAT_REUSE_MATRIX) {
3381     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3382     if (isrow_d) {
3383       sameRowDist  = PETSC_TRUE;
3384       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3385     } else {
3386       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3387       if (iscol_local) {
3388         sameRowDist  = PETSC_TRUE;
3389         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3390       }
3391     }
3392   } else {
3393     /* Check if isrow has same processor distribution as mat */
3394     sameDist[0] = PETSC_FALSE;
3395     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3396     if (!n) {
3397       sameDist[0] = PETSC_TRUE;
3398     } else {
3399       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3400       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3401       if (i >= start && j < end) {
3402         sameDist[0] = PETSC_TRUE;
3403       }
3404     }
3405 
3406     /* Check if iscol has same processor distribution as mat */
3407     sameDist[1] = PETSC_FALSE;
3408     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3409     if (!n) {
3410       sameDist[1] = PETSC_TRUE;
3411     } else {
3412       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3413       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3414       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3415     }
3416 
3417     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3418     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3419     sameRowDist = tsameDist[0];
3420   }
3421 
3422   if (sameRowDist) {
3423     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3424       /* isrow and iscol have same processor distribution as mat */
3425       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3426       PetscFunctionReturn(0);
3427     } else { /* sameRowDist */
3428       /* isrow has same processor distribution as mat */
3429       if (call == MAT_INITIAL_MATRIX) {
3430         PetscBool sorted;
3431         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3432         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3433         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3434         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3435 
3436         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3437         if (sorted) {
3438           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3439           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3440           PetscFunctionReturn(0);
3441         }
3442       } else { /* call == MAT_REUSE_MATRIX */
3443         IS    iscol_sub;
3444         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3445         if (iscol_sub) {
3446           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3447           PetscFunctionReturn(0);
3448         }
3449       }
3450     }
3451   }
3452 
3453   /* General case: iscol -> iscol_local which has global size of iscol */
3454   if (call == MAT_REUSE_MATRIX) {
3455     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3456     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3457   } else {
3458     if (!iscol_local) {
3459       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3460     }
3461   }
3462 
3463   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3464   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3465 
3466   if (call == MAT_INITIAL_MATRIX) {
3467     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3468     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3469   }
3470   PetscFunctionReturn(0);
3471 }
3472 
3473 /*@C
3474      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3475          and "off-diagonal" part of the matrix in CSR format.
3476 
3477    Collective on MPI_Comm
3478 
3479    Input Parameters:
3480 +  comm - MPI communicator
3481 .  A - "diagonal" portion of matrix
3482 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3483 -  garray - global index of B columns
3484 
3485    Output Parameter:
3486 .   mat - the matrix, with input A as its local diagonal matrix
3487    Level: advanced
3488 
3489    Notes:
3490        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3491        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3492 
3493 .seealso: MatCreateMPIAIJWithSplitArrays()
3494 @*/
3495 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3496 {
3497   PetscErrorCode ierr;
3498   Mat_MPIAIJ     *maij;
3499   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3500   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3501   PetscScalar    *oa=b->a;
3502   Mat            Bnew;
3503   PetscInt       m,n,N;
3504 
3505   PetscFunctionBegin;
3506   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3507   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3508   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3509   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3510   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3511   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3512 
3513   /* Get global columns of mat */
3514   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3515 
3516   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3517   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3518   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3519   maij = (Mat_MPIAIJ*)(*mat)->data;
3520 
3521   (*mat)->preallocated = PETSC_TRUE;
3522 
3523   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3524   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3525 
3526   /* Set A as diagonal portion of *mat */
3527   maij->A = A;
3528 
3529   nz = oi[m];
3530   for (i=0; i<nz; i++) {
3531     col   = oj[i];
3532     oj[i] = garray[col];
3533   }
3534 
3535    /* Set Bnew as off-diagonal portion of *mat */
3536   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3537   bnew        = (Mat_SeqAIJ*)Bnew->data;
3538   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3539   maij->B     = Bnew;
3540 
3541   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3542 
3543   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3544   b->free_a       = PETSC_FALSE;
3545   b->free_ij      = PETSC_FALSE;
3546   ierr = MatDestroy(&B);CHKERRQ(ierr);
3547 
3548   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3549   bnew->free_a       = PETSC_TRUE;
3550   bnew->free_ij      = PETSC_TRUE;
3551 
3552   /* condense columns of maij->B */
3553   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3554   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3555   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3556   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3557   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3558   PetscFunctionReturn(0);
3559 }
3560 
3561 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3562 
3563 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3564 {
3565   PetscErrorCode ierr;
3566   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3567   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3568   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3569   Mat            M,Msub,B=a->B;
3570   MatScalar      *aa;
3571   Mat_SeqAIJ     *aij;
3572   PetscInt       *garray = a->garray,*colsub,Ncols;
3573   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3574   IS             iscol_sub,iscmap;
3575   const PetscInt *is_idx,*cmap;
3576   PetscBool      allcolumns=PETSC_FALSE;
3577   MPI_Comm       comm;
3578 
3579   PetscFunctionBegin;
3580   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3581 
3582   if (call == MAT_REUSE_MATRIX) {
3583     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3584     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3585     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3586 
3587     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3588     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3589 
3590     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3591     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3592 
3593     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3594 
3595   } else { /* call == MAT_INITIAL_MATRIX) */
3596     PetscBool flg;
3597 
3598     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3599     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3600 
3601     /* (1) iscol -> nonscalable iscol_local */
3602     /* Check for special case: each processor gets entire matrix columns */
3603     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3604     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3605     if (allcolumns) {
3606       iscol_sub = iscol_local;
3607       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3608       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3609 
3610     } else {
3611       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3612       PetscInt *idx,*cmap1,k;
3613       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3614       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3615       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3616       count = 0;
3617       k     = 0;
3618       for (i=0; i<Ncols; i++) {
3619         j = is_idx[i];
3620         if (j >= cstart && j < cend) {
3621           /* diagonal part of mat */
3622           idx[count]     = j;
3623           cmap1[count++] = i; /* column index in submat */
3624         } else if (Bn) {
3625           /* off-diagonal part of mat */
3626           if (j == garray[k]) {
3627             idx[count]     = j;
3628             cmap1[count++] = i;  /* column index in submat */
3629           } else if (j > garray[k]) {
3630             while (j > garray[k] && k < Bn-1) k++;
3631             if (j == garray[k]) {
3632               idx[count]     = j;
3633               cmap1[count++] = i; /* column index in submat */
3634             }
3635           }
3636         }
3637       }
3638       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3639 
3640       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3641       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3642       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3643 
3644       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3645     }
3646 
3647     /* (3) Create sequential Msub */
3648     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3649   }
3650 
3651   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3652   aij  = (Mat_SeqAIJ*)(Msub)->data;
3653   ii   = aij->i;
3654   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3655 
3656   /*
3657       m - number of local rows
3658       Ncols - number of columns (same on all processors)
3659       rstart - first row in new global matrix generated
3660   */
3661   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3662 
3663   if (call == MAT_INITIAL_MATRIX) {
3664     /* (4) Create parallel newmat */
3665     PetscMPIInt    rank,size;
3666     PetscInt       csize;
3667 
3668     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3669     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3670 
3671     /*
3672         Determine the number of non-zeros in the diagonal and off-diagonal
3673         portions of the matrix in order to do correct preallocation
3674     */
3675 
3676     /* first get start and end of "diagonal" columns */
3677     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3678     if (csize == PETSC_DECIDE) {
3679       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3680       if (mglobal == Ncols) { /* square matrix */
3681         nlocal = m;
3682       } else {
3683         nlocal = Ncols/size + ((Ncols % size) > rank);
3684       }
3685     } else {
3686       nlocal = csize;
3687     }
3688     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3689     rstart = rend - nlocal;
3690     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3691 
3692     /* next, compute all the lengths */
3693     jj    = aij->j;
3694     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3695     olens = dlens + m;
3696     for (i=0; i<m; i++) {
3697       jend = ii[i+1] - ii[i];
3698       olen = 0;
3699       dlen = 0;
3700       for (j=0; j<jend; j++) {
3701         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3702         else dlen++;
3703         jj++;
3704       }
3705       olens[i] = olen;
3706       dlens[i] = dlen;
3707     }
3708 
3709     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3710     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3711 
3712     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3713     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3714     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3715     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3716     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3717     ierr = PetscFree(dlens);CHKERRQ(ierr);
3718 
3719   } else { /* call == MAT_REUSE_MATRIX */
3720     M    = *newmat;
3721     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3722     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3723     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3724     /*
3725          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3726        rather than the slower MatSetValues().
3727     */
3728     M->was_assembled = PETSC_TRUE;
3729     M->assembled     = PETSC_FALSE;
3730   }
3731 
3732   /* (5) Set values of Msub to *newmat */
3733   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3734   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3735 
3736   jj   = aij->j;
3737   aa   = aij->a;
3738   for (i=0; i<m; i++) {
3739     row = rstart + i;
3740     nz  = ii[i+1] - ii[i];
3741     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3742     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3743     jj += nz; aa += nz;
3744   }
3745   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3746 
3747   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3748   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3749 
3750   ierr = PetscFree(colsub);CHKERRQ(ierr);
3751 
3752   /* save Msub, iscol_sub and iscmap used in processor for next request */
3753   if (call ==  MAT_INITIAL_MATRIX) {
3754     *newmat = M;
3755     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3756     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3757 
3758     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3759     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3760 
3761     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3762     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3763 
3764     if (iscol_local) {
3765       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3766       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3767     }
3768   }
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 /*
3773     Not great since it makes two copies of the submatrix, first an SeqAIJ
3774   in local and then by concatenating the local matrices the end result.
3775   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3776 
3777   Note: This requires a sequential iscol with all indices.
3778 */
3779 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3780 {
3781   PetscErrorCode ierr;
3782   PetscMPIInt    rank,size;
3783   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3784   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3785   Mat            M,Mreuse;
3786   MatScalar      *aa,*vwork;
3787   MPI_Comm       comm;
3788   Mat_SeqAIJ     *aij;
3789   PetscBool      colflag,allcolumns=PETSC_FALSE;
3790 
3791   PetscFunctionBegin;
3792   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3793   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3794   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3795 
3796   /* Check for special case: each processor gets entire matrix columns */
3797   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3798   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3799   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3800 
3801   if (call ==  MAT_REUSE_MATRIX) {
3802     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3803     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3804     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3805   } else {
3806     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3807   }
3808 
3809   /*
3810       m - number of local rows
3811       n - number of columns (same on all processors)
3812       rstart - first row in new global matrix generated
3813   */
3814   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3815   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3816   if (call == MAT_INITIAL_MATRIX) {
3817     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3818     ii  = aij->i;
3819     jj  = aij->j;
3820 
3821     /*
3822         Determine the number of non-zeros in the diagonal and off-diagonal
3823         portions of the matrix in order to do correct preallocation
3824     */
3825 
3826     /* first get start and end of "diagonal" columns */
3827     if (csize == PETSC_DECIDE) {
3828       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3829       if (mglobal == n) { /* square matrix */
3830         nlocal = m;
3831       } else {
3832         nlocal = n/size + ((n % size) > rank);
3833       }
3834     } else {
3835       nlocal = csize;
3836     }
3837     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3838     rstart = rend - nlocal;
3839     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3840 
3841     /* next, compute all the lengths */
3842     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3843     olens = dlens + m;
3844     for (i=0; i<m; i++) {
3845       jend = ii[i+1] - ii[i];
3846       olen = 0;
3847       dlen = 0;
3848       for (j=0; j<jend; j++) {
3849         if (*jj < rstart || *jj >= rend) olen++;
3850         else dlen++;
3851         jj++;
3852       }
3853       olens[i] = olen;
3854       dlens[i] = dlen;
3855     }
3856     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3857     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3858     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3859     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3860     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3861     ierr = PetscFree(dlens);CHKERRQ(ierr);
3862   } else {
3863     PetscInt ml,nl;
3864 
3865     M    = *newmat;
3866     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3867     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3868     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3869     /*
3870          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3871        rather than the slower MatSetValues().
3872     */
3873     M->was_assembled = PETSC_TRUE;
3874     M->assembled     = PETSC_FALSE;
3875   }
3876   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3877   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3878   ii   = aij->i;
3879   jj   = aij->j;
3880   aa   = aij->a;
3881   for (i=0; i<m; i++) {
3882     row   = rstart + i;
3883     nz    = ii[i+1] - ii[i];
3884     cwork = jj;     jj += nz;
3885     vwork = aa;     aa += nz;
3886     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3887   }
3888 
3889   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3890   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3891   *newmat = M;
3892 
3893   /* save submatrix used in processor for next request */
3894   if (call ==  MAT_INITIAL_MATRIX) {
3895     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3896     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3897   }
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3902 {
3903   PetscInt       m,cstart, cend,j,nnz,i,d;
3904   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3905   const PetscInt *JJ;
3906   PetscScalar    *values;
3907   PetscErrorCode ierr;
3908   PetscBool      nooffprocentries;
3909 
3910   PetscFunctionBegin;
3911   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3912 
3913   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3914   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3915   m      = B->rmap->n;
3916   cstart = B->cmap->rstart;
3917   cend   = B->cmap->rend;
3918   rstart = B->rmap->rstart;
3919 
3920   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3921 
3922 #if defined(PETSC_USE_DEBUG)
3923   for (i=0; i<m && Ii; i++) {
3924     nnz = Ii[i+1]- Ii[i];
3925     JJ  = J + Ii[i];
3926     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3927     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3928     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3929   }
3930 #endif
3931 
3932   for (i=0; i<m && Ii; i++) {
3933     nnz     = Ii[i+1]- Ii[i];
3934     JJ      = J + Ii[i];
3935     nnz_max = PetscMax(nnz_max,nnz);
3936     d       = 0;
3937     for (j=0; j<nnz; j++) {
3938       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3939     }
3940     d_nnz[i] = d;
3941     o_nnz[i] = nnz - d;
3942   }
3943   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3944   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3945 
3946   if (v) values = (PetscScalar*)v;
3947   else {
3948     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3949   }
3950 
3951   for (i=0; i<m && Ii; i++) {
3952     ii   = i + rstart;
3953     nnz  = Ii[i+1]- Ii[i];
3954     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3955   }
3956   nooffprocentries    = B->nooffprocentries;
3957   B->nooffprocentries = PETSC_TRUE;
3958   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3959   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3960   B->nooffprocentries = nooffprocentries;
3961 
3962   if (!v) {
3963     ierr = PetscFree(values);CHKERRQ(ierr);
3964   }
3965   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 /*@
3970    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3971    (the default parallel PETSc format).
3972 
3973    Collective on MPI_Comm
3974 
3975    Input Parameters:
3976 +  B - the matrix
3977 .  i - the indices into j for the start of each local row (starts with zero)
3978 .  j - the column indices for each local row (starts with zero)
3979 -  v - optional values in the matrix
3980 
3981    Level: developer
3982 
3983    Notes:
3984        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3985      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3986      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3987 
3988        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3989 
3990        The format which is used for the sparse matrix input, is equivalent to a
3991     row-major ordering.. i.e for the following matrix, the input data expected is
3992     as shown
3993 
3994 $        1 0 0
3995 $        2 0 3     P0
3996 $       -------
3997 $        4 5 6     P1
3998 $
3999 $     Process0 [P0]: rows_owned=[0,1]
4000 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4001 $        j =  {0,0,2}  [size = 3]
4002 $        v =  {1,2,3}  [size = 3]
4003 $
4004 $     Process1 [P1]: rows_owned=[2]
4005 $        i =  {0,3}    [size = nrow+1  = 1+1]
4006 $        j =  {0,1,2}  [size = 3]
4007 $        v =  {4,5,6}  [size = 3]
4008 
4009 .keywords: matrix, aij, compressed row, sparse, parallel
4010 
4011 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4012           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4013 @*/
4014 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4015 {
4016   PetscErrorCode ierr;
4017 
4018   PetscFunctionBegin;
4019   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4020   PetscFunctionReturn(0);
4021 }
4022 
4023 /*@C
4024    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4025    (the default parallel PETSc format).  For good matrix assembly performance
4026    the user should preallocate the matrix storage by setting the parameters
4027    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4028    performance can be increased by more than a factor of 50.
4029 
4030    Collective on MPI_Comm
4031 
4032    Input Parameters:
4033 +  B - the matrix
4034 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4035            (same value is used for all local rows)
4036 .  d_nnz - array containing the number of nonzeros in the various rows of the
4037            DIAGONAL portion of the local submatrix (possibly different for each row)
4038            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4039            The size of this array is equal to the number of local rows, i.e 'm'.
4040            For matrices that will be factored, you must leave room for (and set)
4041            the diagonal entry even if it is zero.
4042 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4043            submatrix (same value is used for all local rows).
4044 -  o_nnz - array containing the number of nonzeros in the various rows of the
4045            OFF-DIAGONAL portion of the local submatrix (possibly different for
4046            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4047            structure. The size of this array is equal to the number
4048            of local rows, i.e 'm'.
4049 
4050    If the *_nnz parameter is given then the *_nz parameter is ignored
4051 
4052    The AIJ format (also called the Yale sparse matrix format or
4053    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4054    storage.  The stored row and column indices begin with zero.
4055    See Users-Manual: ch_mat for details.
4056 
4057    The parallel matrix is partitioned such that the first m0 rows belong to
4058    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4059    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4060 
4061    The DIAGONAL portion of the local submatrix of a processor can be defined
4062    as the submatrix which is obtained by extraction the part corresponding to
4063    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4064    first row that belongs to the processor, r2 is the last row belonging to
4065    the this processor, and c1-c2 is range of indices of the local part of a
4066    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4067    common case of a square matrix, the row and column ranges are the same and
4068    the DIAGONAL part is also square. The remaining portion of the local
4069    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4070 
4071    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4072 
4073    You can call MatGetInfo() to get information on how effective the preallocation was;
4074    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4075    You can also run with the option -info and look for messages with the string
4076    malloc in them to see if additional memory allocation was needed.
4077 
4078    Example usage:
4079 
4080    Consider the following 8x8 matrix with 34 non-zero values, that is
4081    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4082    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4083    as follows:
4084 
4085 .vb
4086             1  2  0  |  0  3  0  |  0  4
4087     Proc0   0  5  6  |  7  0  0  |  8  0
4088             9  0 10  | 11  0  0  | 12  0
4089     -------------------------------------
4090            13  0 14  | 15 16 17  |  0  0
4091     Proc1   0 18  0  | 19 20 21  |  0  0
4092             0  0  0  | 22 23  0  | 24  0
4093     -------------------------------------
4094     Proc2  25 26 27  |  0  0 28  | 29  0
4095            30  0  0  | 31 32 33  |  0 34
4096 .ve
4097 
4098    This can be represented as a collection of submatrices as:
4099 
4100 .vb
4101       A B C
4102       D E F
4103       G H I
4104 .ve
4105 
4106    Where the submatrices A,B,C are owned by proc0, D,E,F are
4107    owned by proc1, G,H,I are owned by proc2.
4108 
4109    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4110    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4111    The 'M','N' parameters are 8,8, and have the same values on all procs.
4112 
4113    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4114    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4115    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4116    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4117    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4118    matrix, ans [DF] as another SeqAIJ matrix.
4119 
4120    When d_nz, o_nz parameters are specified, d_nz storage elements are
4121    allocated for every row of the local diagonal submatrix, and o_nz
4122    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4123    One way to choose d_nz and o_nz is to use the max nonzerors per local
4124    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4125    In this case, the values of d_nz,o_nz are:
4126 .vb
4127      proc0 : dnz = 2, o_nz = 2
4128      proc1 : dnz = 3, o_nz = 2
4129      proc2 : dnz = 1, o_nz = 4
4130 .ve
4131    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4132    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4133    for proc3. i.e we are using 12+15+10=37 storage locations to store
4134    34 values.
4135 
4136    When d_nnz, o_nnz parameters are specified, the storage is specified
4137    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4138    In the above case the values for d_nnz,o_nnz are:
4139 .vb
4140      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4141      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4142      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4143 .ve
4144    Here the space allocated is sum of all the above values i.e 34, and
4145    hence pre-allocation is perfect.
4146 
4147    Level: intermediate
4148 
4149 .keywords: matrix, aij, compressed row, sparse, parallel
4150 
4151 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4152           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4153 @*/
4154 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4155 {
4156   PetscErrorCode ierr;
4157 
4158   PetscFunctionBegin;
4159   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4160   PetscValidType(B,1);
4161   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4162   PetscFunctionReturn(0);
4163 }
4164 
4165 /*@
4166      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4167          CSR format the local rows.
4168 
4169    Collective on MPI_Comm
4170 
4171    Input Parameters:
4172 +  comm - MPI communicator
4173 .  m - number of local rows (Cannot be PETSC_DECIDE)
4174 .  n - This value should be the same as the local size used in creating the
4175        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4176        calculated if N is given) For square matrices n is almost always m.
4177 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4178 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4179 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4180 .   j - column indices
4181 -   a - matrix values
4182 
4183    Output Parameter:
4184 .   mat - the matrix
4185 
4186    Level: intermediate
4187 
4188    Notes:
4189        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4190      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4191      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4192 
4193        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4194 
4195        The format which is used for the sparse matrix input, is equivalent to a
4196     row-major ordering.. i.e for the following matrix, the input data expected is
4197     as shown
4198 
4199 $        1 0 0
4200 $        2 0 3     P0
4201 $       -------
4202 $        4 5 6     P1
4203 $
4204 $     Process0 [P0]: rows_owned=[0,1]
4205 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4206 $        j =  {0,0,2}  [size = 3]
4207 $        v =  {1,2,3}  [size = 3]
4208 $
4209 $     Process1 [P1]: rows_owned=[2]
4210 $        i =  {0,3}    [size = nrow+1  = 1+1]
4211 $        j =  {0,1,2}  [size = 3]
4212 $        v =  {4,5,6}  [size = 3]
4213 
4214 .keywords: matrix, aij, compressed row, sparse, parallel
4215 
4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4217           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4218 @*/
4219 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4220 {
4221   PetscErrorCode ierr;
4222 
4223   PetscFunctionBegin;
4224   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4225   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4226   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4227   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4228   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4229   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4230   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4231   PetscFunctionReturn(0);
4232 }
4233 
4234 /*@C
4235    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4236    (the default parallel PETSc format).  For good matrix assembly performance
4237    the user should preallocate the matrix storage by setting the parameters
4238    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4239    performance can be increased by more than a factor of 50.
4240 
4241    Collective on MPI_Comm
4242 
4243    Input Parameters:
4244 +  comm - MPI communicator
4245 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4246            This value should be the same as the local size used in creating the
4247            y vector for the matrix-vector product y = Ax.
4248 .  n - This value should be the same as the local size used in creating the
4249        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4250        calculated if N is given) For square matrices n is almost always m.
4251 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4252 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4253 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4254            (same value is used for all local rows)
4255 .  d_nnz - array containing the number of nonzeros in the various rows of the
4256            DIAGONAL portion of the local submatrix (possibly different for each row)
4257            or NULL, if d_nz is used to specify the nonzero structure.
4258            The size of this array is equal to the number of local rows, i.e 'm'.
4259 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4260            submatrix (same value is used for all local rows).
4261 -  o_nnz - array containing the number of nonzeros in the various rows of the
4262            OFF-DIAGONAL portion of the local submatrix (possibly different for
4263            each row) or NULL, if o_nz is used to specify the nonzero
4264            structure. The size of this array is equal to the number
4265            of local rows, i.e 'm'.
4266 
4267    Output Parameter:
4268 .  A - the matrix
4269 
4270    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4271    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4272    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4273 
4274    Notes:
4275    If the *_nnz parameter is given then the *_nz parameter is ignored
4276 
4277    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4278    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4279    storage requirements for this matrix.
4280 
4281    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4282    processor than it must be used on all processors that share the object for
4283    that argument.
4284 
4285    The user MUST specify either the local or global matrix dimensions
4286    (possibly both).
4287 
4288    The parallel matrix is partitioned across processors such that the
4289    first m0 rows belong to process 0, the next m1 rows belong to
4290    process 1, the next m2 rows belong to process 2 etc.. where
4291    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4292    values corresponding to [m x N] submatrix.
4293 
4294    The columns are logically partitioned with the n0 columns belonging
4295    to 0th partition, the next n1 columns belonging to the next
4296    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4297 
4298    The DIAGONAL portion of the local submatrix on any given processor
4299    is the submatrix corresponding to the rows and columns m,n
4300    corresponding to the given processor. i.e diagonal matrix on
4301    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4302    etc. The remaining portion of the local submatrix [m x (N-n)]
4303    constitute the OFF-DIAGONAL portion. The example below better
4304    illustrates this concept.
4305 
4306    For a square global matrix we define each processor's diagonal portion
4307    to be its local rows and the corresponding columns (a square submatrix);
4308    each processor's off-diagonal portion encompasses the remainder of the
4309    local matrix (a rectangular submatrix).
4310 
4311    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4312 
4313    When calling this routine with a single process communicator, a matrix of
4314    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4315    type of communicator, use the construction mechanism
4316 .vb
4317      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4318 .ve
4319 
4320 $     MatCreate(...,&A);
4321 $     MatSetType(A,MATMPIAIJ);
4322 $     MatSetSizes(A, m,n,M,N);
4323 $     MatMPIAIJSetPreallocation(A,...);
4324 
4325    By default, this format uses inodes (identical nodes) when possible.
4326    We search for consecutive rows with the same nonzero structure, thereby
4327    reusing matrix information to achieve increased efficiency.
4328 
4329    Options Database Keys:
4330 +  -mat_no_inode  - Do not use inodes
4331 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4332 
4333 
4334 
4335    Example usage:
4336 
4337    Consider the following 8x8 matrix with 34 non-zero values, that is
4338    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4339    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4340    as follows
4341 
4342 .vb
4343             1  2  0  |  0  3  0  |  0  4
4344     Proc0   0  5  6  |  7  0  0  |  8  0
4345             9  0 10  | 11  0  0  | 12  0
4346     -------------------------------------
4347            13  0 14  | 15 16 17  |  0  0
4348     Proc1   0 18  0  | 19 20 21  |  0  0
4349             0  0  0  | 22 23  0  | 24  0
4350     -------------------------------------
4351     Proc2  25 26 27  |  0  0 28  | 29  0
4352            30  0  0  | 31 32 33  |  0 34
4353 .ve
4354 
4355    This can be represented as a collection of submatrices as
4356 
4357 .vb
4358       A B C
4359       D E F
4360       G H I
4361 .ve
4362 
4363    Where the submatrices A,B,C are owned by proc0, D,E,F are
4364    owned by proc1, G,H,I are owned by proc2.
4365 
4366    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4367    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4368    The 'M','N' parameters are 8,8, and have the same values on all procs.
4369 
4370    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4371    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4372    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4373    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4374    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4375    matrix, ans [DF] as another SeqAIJ matrix.
4376 
4377    When d_nz, o_nz parameters are specified, d_nz storage elements are
4378    allocated for every row of the local diagonal submatrix, and o_nz
4379    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4380    One way to choose d_nz and o_nz is to use the max nonzerors per local
4381    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4382    In this case, the values of d_nz,o_nz are
4383 .vb
4384      proc0 : dnz = 2, o_nz = 2
4385      proc1 : dnz = 3, o_nz = 2
4386      proc2 : dnz = 1, o_nz = 4
4387 .ve
4388    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4389    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4390    for proc3. i.e we are using 12+15+10=37 storage locations to store
4391    34 values.
4392 
4393    When d_nnz, o_nnz parameters are specified, the storage is specified
4394    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4395    In the above case the values for d_nnz,o_nnz are
4396 .vb
4397      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4398      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4399      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4400 .ve
4401    Here the space allocated is sum of all the above values i.e 34, and
4402    hence pre-allocation is perfect.
4403 
4404    Level: intermediate
4405 
4406 .keywords: matrix, aij, compressed row, sparse, parallel
4407 
4408 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4409           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4410 @*/
4411 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4412 {
4413   PetscErrorCode ierr;
4414   PetscMPIInt    size;
4415 
4416   PetscFunctionBegin;
4417   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4418   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4420   if (size > 1) {
4421     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4422     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4423   } else {
4424     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4425     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4426   }
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4431 {
4432   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4433   PetscBool      flg;
4434   PetscErrorCode ierr;
4435 
4436   PetscFunctionBegin;
4437   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4438   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4439   if (Ad)     *Ad     = a->A;
4440   if (Ao)     *Ao     = a->B;
4441   if (colmap) *colmap = a->garray;
4442   PetscFunctionReturn(0);
4443 }
4444 
4445 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4446 {
4447   PetscErrorCode ierr;
4448   PetscInt       m,N,i,rstart,nnz,Ii;
4449   PetscInt       *indx;
4450   PetscScalar    *values;
4451 
4452   PetscFunctionBegin;
4453   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4454   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4455     PetscInt       *dnz,*onz,sum,bs,cbs;
4456 
4457     if (n == PETSC_DECIDE) {
4458       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4459     }
4460     /* Check sum(n) = N */
4461     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4462     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4463 
4464     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4465     rstart -= m;
4466 
4467     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4468     for (i=0; i<m; i++) {
4469       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4470       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4471       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4472     }
4473 
4474     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4475     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4476     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4477     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4478     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4479     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4480     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4481     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4482   }
4483 
4484   /* numeric phase */
4485   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4486   for (i=0; i<m; i++) {
4487     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4488     Ii   = i + rstart;
4489     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4490     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4491   }
4492   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4493   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4498 {
4499   PetscErrorCode    ierr;
4500   PetscMPIInt       rank;
4501   PetscInt          m,N,i,rstart,nnz;
4502   size_t            len;
4503   const PetscInt    *indx;
4504   PetscViewer       out;
4505   char              *name;
4506   Mat               B;
4507   const PetscScalar *values;
4508 
4509   PetscFunctionBegin;
4510   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4511   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4512   /* Should this be the type of the diagonal block of A? */
4513   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4514   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4515   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4516   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4517   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4518   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4519   for (i=0; i<m; i++) {
4520     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4521     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4522     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4523   }
4524   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4525   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4526 
4527   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4528   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4529   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4530   sprintf(name,"%s.%d",outfile,rank);
4531   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4532   ierr = PetscFree(name);CHKERRQ(ierr);
4533   ierr = MatView(B,out);CHKERRQ(ierr);
4534   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4535   ierr = MatDestroy(&B);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4540 {
4541   PetscErrorCode      ierr;
4542   Mat_Merge_SeqsToMPI *merge;
4543   PetscContainer      container;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4547   if (container) {
4548     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4561     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4562     ierr = PetscFree(merge);CHKERRQ(ierr);
4563     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4564   }
4565   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4566   PetscFunctionReturn(0);
4567 }
4568 
4569 #include <../src/mat/utils/freespace.h>
4570 #include <petscbt.h>
4571 
4572 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4573 {
4574   PetscErrorCode      ierr;
4575   MPI_Comm            comm;
4576   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4577   PetscMPIInt         size,rank,taga,*len_s;
4578   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4579   PetscInt            proc,m;
4580   PetscInt            **buf_ri,**buf_rj;
4581   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4582   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4583   MPI_Request         *s_waits,*r_waits;
4584   MPI_Status          *status;
4585   MatScalar           *aa=a->a;
4586   MatScalar           **abuf_r,*ba_i;
4587   Mat_Merge_SeqsToMPI *merge;
4588   PetscContainer      container;
4589 
4590   PetscFunctionBegin;
4591   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4592   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4593 
4594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4595   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4596 
4597   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4598   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4599 
4600   bi     = merge->bi;
4601   bj     = merge->bj;
4602   buf_ri = merge->buf_ri;
4603   buf_rj = merge->buf_rj;
4604 
4605   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4606   owners = merge->rowmap->range;
4607   len_s  = merge->len_s;
4608 
4609   /* send and recv matrix values */
4610   /*-----------------------------*/
4611   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4612   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4613 
4614   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4615   for (proc=0,k=0; proc<size; proc++) {
4616     if (!len_s[proc]) continue;
4617     i    = owners[proc];
4618     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4619     k++;
4620   }
4621 
4622   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4623   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4624   ierr = PetscFree(status);CHKERRQ(ierr);
4625 
4626   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4627   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4628 
4629   /* insert mat values of mpimat */
4630   /*----------------------------*/
4631   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4632   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4633 
4634   for (k=0; k<merge->nrecv; k++) {
4635     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4636     nrows       = *(buf_ri_k[k]);
4637     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4638     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4639   }
4640 
4641   /* set values of ba */
4642   m = merge->rowmap->n;
4643   for (i=0; i<m; i++) {
4644     arow = owners[rank] + i;
4645     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4646     bnzi = bi[i+1] - bi[i];
4647     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4648 
4649     /* add local non-zero vals of this proc's seqmat into ba */
4650     anzi   = ai[arow+1] - ai[arow];
4651     aj     = a->j + ai[arow];
4652     aa     = a->a + ai[arow];
4653     nextaj = 0;
4654     for (j=0; nextaj<anzi; j++) {
4655       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4656         ba_i[j] += aa[nextaj++];
4657       }
4658     }
4659 
4660     /* add received vals into ba */
4661     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4662       /* i-th row */
4663       if (i == *nextrow[k]) {
4664         anzi   = *(nextai[k]+1) - *nextai[k];
4665         aj     = buf_rj[k] + *(nextai[k]);
4666         aa     = abuf_r[k] + *(nextai[k]);
4667         nextaj = 0;
4668         for (j=0; nextaj<anzi; j++) {
4669           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4670             ba_i[j] += aa[nextaj++];
4671           }
4672         }
4673         nextrow[k]++; nextai[k]++;
4674       }
4675     }
4676     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4677   }
4678   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4679   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4680 
4681   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4682   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4683   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4684   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4685   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4686   PetscFunctionReturn(0);
4687 }
4688 
4689 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4690 {
4691   PetscErrorCode      ierr;
4692   Mat                 B_mpi;
4693   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4694   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4695   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4696   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4697   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4698   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4699   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4700   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4701   MPI_Status          *status;
4702   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4703   PetscBT             lnkbt;
4704   Mat_Merge_SeqsToMPI *merge;
4705   PetscContainer      container;
4706 
4707   PetscFunctionBegin;
4708   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4709 
4710   /* make sure it is a PETSc comm */
4711   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4712   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4714 
4715   ierr = PetscNew(&merge);CHKERRQ(ierr);
4716   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4717 
4718   /* determine row ownership */
4719   /*---------------------------------------------------------*/
4720   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4721   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4722   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4723   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4724   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4725   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4726   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4727 
4728   m      = merge->rowmap->n;
4729   owners = merge->rowmap->range;
4730 
4731   /* determine the number of messages to send, their lengths */
4732   /*---------------------------------------------------------*/
4733   len_s = merge->len_s;
4734 
4735   len          = 0; /* length of buf_si[] */
4736   merge->nsend = 0;
4737   for (proc=0; proc<size; proc++) {
4738     len_si[proc] = 0;
4739     if (proc == rank) {
4740       len_s[proc] = 0;
4741     } else {
4742       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4743       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4744     }
4745     if (len_s[proc]) {
4746       merge->nsend++;
4747       nrows = 0;
4748       for (i=owners[proc]; i<owners[proc+1]; i++) {
4749         if (ai[i+1] > ai[i]) nrows++;
4750       }
4751       len_si[proc] = 2*(nrows+1);
4752       len         += len_si[proc];
4753     }
4754   }
4755 
4756   /* determine the number and length of messages to receive for ij-structure */
4757   /*-------------------------------------------------------------------------*/
4758   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4759   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4760 
4761   /* post the Irecv of j-structure */
4762   /*-------------------------------*/
4763   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4764   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4765 
4766   /* post the Isend of j-structure */
4767   /*--------------------------------*/
4768   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4769 
4770   for (proc=0, k=0; proc<size; proc++) {
4771     if (!len_s[proc]) continue;
4772     i    = owners[proc];
4773     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4774     k++;
4775   }
4776 
4777   /* receives and sends of j-structure are complete */
4778   /*------------------------------------------------*/
4779   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4780   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4781 
4782   /* send and recv i-structure */
4783   /*---------------------------*/
4784   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4785   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4786 
4787   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4788   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4789   for (proc=0,k=0; proc<size; proc++) {
4790     if (!len_s[proc]) continue;
4791     /* form outgoing message for i-structure:
4792          buf_si[0]:                 nrows to be sent
4793                [1:nrows]:           row index (global)
4794                [nrows+1:2*nrows+1]: i-structure index
4795     */
4796     /*-------------------------------------------*/
4797     nrows       = len_si[proc]/2 - 1;
4798     buf_si_i    = buf_si + nrows+1;
4799     buf_si[0]   = nrows;
4800     buf_si_i[0] = 0;
4801     nrows       = 0;
4802     for (i=owners[proc]; i<owners[proc+1]; i++) {
4803       anzi = ai[i+1] - ai[i];
4804       if (anzi) {
4805         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4806         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4807         nrows++;
4808       }
4809     }
4810     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4811     k++;
4812     buf_si += len_si[proc];
4813   }
4814 
4815   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4816   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4817 
4818   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4819   for (i=0; i<merge->nrecv; i++) {
4820     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4821   }
4822 
4823   ierr = PetscFree(len_si);CHKERRQ(ierr);
4824   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4825   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4826   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4827   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4828   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4829   ierr = PetscFree(status);CHKERRQ(ierr);
4830 
4831   /* compute a local seq matrix in each processor */
4832   /*----------------------------------------------*/
4833   /* allocate bi array and free space for accumulating nonzero column info */
4834   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4835   bi[0] = 0;
4836 
4837   /* create and initialize a linked list */
4838   nlnk = N+1;
4839   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4840 
4841   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4842   len  = ai[owners[rank+1]] - ai[owners[rank]];
4843   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4844 
4845   current_space = free_space;
4846 
4847   /* determine symbolic info for each local row */
4848   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4849 
4850   for (k=0; k<merge->nrecv; k++) {
4851     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4852     nrows       = *buf_ri_k[k];
4853     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4854     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4855   }
4856 
4857   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4858   len  = 0;
4859   for (i=0; i<m; i++) {
4860     bnzi = 0;
4861     /* add local non-zero cols of this proc's seqmat into lnk */
4862     arow  = owners[rank] + i;
4863     anzi  = ai[arow+1] - ai[arow];
4864     aj    = a->j + ai[arow];
4865     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4866     bnzi += nlnk;
4867     /* add received col data into lnk */
4868     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4869       if (i == *nextrow[k]) { /* i-th row */
4870         anzi  = *(nextai[k]+1) - *nextai[k];
4871         aj    = buf_rj[k] + *nextai[k];
4872         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4873         bnzi += nlnk;
4874         nextrow[k]++; nextai[k]++;
4875       }
4876     }
4877     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4878 
4879     /* if free space is not available, make more free space */
4880     if (current_space->local_remaining<bnzi) {
4881       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4882       nspacedouble++;
4883     }
4884     /* copy data into free space, then initialize lnk */
4885     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4886     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4887 
4888     current_space->array           += bnzi;
4889     current_space->local_used      += bnzi;
4890     current_space->local_remaining -= bnzi;
4891 
4892     bi[i+1] = bi[i] + bnzi;
4893   }
4894 
4895   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4896 
4897   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4898   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4899   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4900 
4901   /* create symbolic parallel matrix B_mpi */
4902   /*---------------------------------------*/
4903   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4904   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4905   if (n==PETSC_DECIDE) {
4906     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4907   } else {
4908     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4909   }
4910   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4911   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4912   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4913   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4914   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4915 
4916   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4917   B_mpi->assembled    = PETSC_FALSE;
4918   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4919   merge->bi           = bi;
4920   merge->bj           = bj;
4921   merge->buf_ri       = buf_ri;
4922   merge->buf_rj       = buf_rj;
4923   merge->coi          = NULL;
4924   merge->coj          = NULL;
4925   merge->owners_co    = NULL;
4926 
4927   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4928 
4929   /* attach the supporting struct to B_mpi for reuse */
4930   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4931   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4932   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4933   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4934   *mpimat = B_mpi;
4935 
4936   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4937   PetscFunctionReturn(0);
4938 }
4939 
4940 /*@C
4941       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4942                  matrices from each processor
4943 
4944     Collective on MPI_Comm
4945 
4946    Input Parameters:
4947 +    comm - the communicators the parallel matrix will live on
4948 .    seqmat - the input sequential matrices
4949 .    m - number of local rows (or PETSC_DECIDE)
4950 .    n - number of local columns (or PETSC_DECIDE)
4951 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4952 
4953    Output Parameter:
4954 .    mpimat - the parallel matrix generated
4955 
4956     Level: advanced
4957 
4958    Notes:
4959      The dimensions of the sequential matrix in each processor MUST be the same.
4960      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4961      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4962 @*/
4963 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4964 {
4965   PetscErrorCode ierr;
4966   PetscMPIInt    size;
4967 
4968   PetscFunctionBegin;
4969   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4970   if (size == 1) {
4971     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4972     if (scall == MAT_INITIAL_MATRIX) {
4973       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4974     } else {
4975       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4976     }
4977     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4978     PetscFunctionReturn(0);
4979   }
4980   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4981   if (scall == MAT_INITIAL_MATRIX) {
4982     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4983   }
4984   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4985   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 /*@
4990      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4991           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4992           with MatGetSize()
4993 
4994     Not Collective
4995 
4996    Input Parameters:
4997 +    A - the matrix
4998 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4999 
5000    Output Parameter:
5001 .    A_loc - the local sequential matrix generated
5002 
5003     Level: developer
5004 
5005 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5006 
5007 @*/
5008 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5009 {
5010   PetscErrorCode ierr;
5011   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5012   Mat_SeqAIJ     *mat,*a,*b;
5013   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5014   MatScalar      *aa,*ba,*cam;
5015   PetscScalar    *ca;
5016   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5017   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5018   PetscBool      match;
5019   MPI_Comm       comm;
5020   PetscMPIInt    size;
5021 
5022   PetscFunctionBegin;
5023   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5024   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5025   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5026   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5027   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5028 
5029   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5030   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5031   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5032   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5033   aa = a->a; ba = b->a;
5034   if (scall == MAT_INITIAL_MATRIX) {
5035     if (size == 1) {
5036       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5037       PetscFunctionReturn(0);
5038     }
5039 
5040     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5041     ci[0] = 0;
5042     for (i=0; i<am; i++) {
5043       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5044     }
5045     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5046     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5047     k    = 0;
5048     for (i=0; i<am; i++) {
5049       ncols_o = bi[i+1] - bi[i];
5050       ncols_d = ai[i+1] - ai[i];
5051       /* off-diagonal portion of A */
5052       for (jo=0; jo<ncols_o; jo++) {
5053         col = cmap[*bj];
5054         if (col >= cstart) break;
5055         cj[k]   = col; bj++;
5056         ca[k++] = *ba++;
5057       }
5058       /* diagonal portion of A */
5059       for (j=0; j<ncols_d; j++) {
5060         cj[k]   = cstart + *aj++;
5061         ca[k++] = *aa++;
5062       }
5063       /* off-diagonal portion of A */
5064       for (j=jo; j<ncols_o; j++) {
5065         cj[k]   = cmap[*bj++];
5066         ca[k++] = *ba++;
5067       }
5068     }
5069     /* put together the new matrix */
5070     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5071     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5072     /* Since these are PETSc arrays, change flags to free them as necessary. */
5073     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5074     mat->free_a  = PETSC_TRUE;
5075     mat->free_ij = PETSC_TRUE;
5076     mat->nonew   = 0;
5077   } else if (scall == MAT_REUSE_MATRIX) {
5078     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5079     ci = mat->i; cj = mat->j; cam = mat->a;
5080     for (i=0; i<am; i++) {
5081       /* off-diagonal portion of A */
5082       ncols_o = bi[i+1] - bi[i];
5083       for (jo=0; jo<ncols_o; jo++) {
5084         col = cmap[*bj];
5085         if (col >= cstart) break;
5086         *cam++ = *ba++; bj++;
5087       }
5088       /* diagonal portion of A */
5089       ncols_d = ai[i+1] - ai[i];
5090       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5091       /* off-diagonal portion of A */
5092       for (j=jo; j<ncols_o; j++) {
5093         *cam++ = *ba++; bj++;
5094       }
5095     }
5096   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5097   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5098   PetscFunctionReturn(0);
5099 }
5100 
5101 /*@C
5102      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5103 
5104     Not Collective
5105 
5106    Input Parameters:
5107 +    A - the matrix
5108 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5109 -    row, col - index sets of rows and columns to extract (or NULL)
5110 
5111    Output Parameter:
5112 .    A_loc - the local sequential matrix generated
5113 
5114     Level: developer
5115 
5116 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5117 
5118 @*/
5119 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5120 {
5121   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5122   PetscErrorCode ierr;
5123   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5124   IS             isrowa,iscola;
5125   Mat            *aloc;
5126   PetscBool      match;
5127 
5128   PetscFunctionBegin;
5129   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5130   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5131   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5132   if (!row) {
5133     start = A->rmap->rstart; end = A->rmap->rend;
5134     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5135   } else {
5136     isrowa = *row;
5137   }
5138   if (!col) {
5139     start = A->cmap->rstart;
5140     cmap  = a->garray;
5141     nzA   = a->A->cmap->n;
5142     nzB   = a->B->cmap->n;
5143     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5144     ncols = 0;
5145     for (i=0; i<nzB; i++) {
5146       if (cmap[i] < start) idx[ncols++] = cmap[i];
5147       else break;
5148     }
5149     imark = i;
5150     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5151     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5152     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5153   } else {
5154     iscola = *col;
5155   }
5156   if (scall != MAT_INITIAL_MATRIX) {
5157     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5158     aloc[0] = *A_loc;
5159   }
5160   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5161   if (!col) { /* attach global id of condensed columns */
5162     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5163   }
5164   *A_loc = aloc[0];
5165   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5166   if (!row) {
5167     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5168   }
5169   if (!col) {
5170     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5171   }
5172   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5173   PetscFunctionReturn(0);
5174 }
5175 
5176 /*@C
5177     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5178 
5179     Collective on Mat
5180 
5181    Input Parameters:
5182 +    A,B - the matrices in mpiaij format
5183 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5184 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5185 
5186    Output Parameter:
5187 +    rowb, colb - index sets of rows and columns of B to extract
5188 -    B_seq - the sequential matrix generated
5189 
5190     Level: developer
5191 
5192 @*/
5193 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5194 {
5195   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5196   PetscErrorCode ierr;
5197   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5198   IS             isrowb,iscolb;
5199   Mat            *bseq=NULL;
5200 
5201   PetscFunctionBegin;
5202   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5203     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5204   }
5205   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5206 
5207   if (scall == MAT_INITIAL_MATRIX) {
5208     start = A->cmap->rstart;
5209     cmap  = a->garray;
5210     nzA   = a->A->cmap->n;
5211     nzB   = a->B->cmap->n;
5212     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5213     ncols = 0;
5214     for (i=0; i<nzB; i++) {  /* row < local row index */
5215       if (cmap[i] < start) idx[ncols++] = cmap[i];
5216       else break;
5217     }
5218     imark = i;
5219     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5220     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5221     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5222     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5223   } else {
5224     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5225     isrowb  = *rowb; iscolb = *colb;
5226     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5227     bseq[0] = *B_seq;
5228   }
5229   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5230   *B_seq = bseq[0];
5231   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5232   if (!rowb) {
5233     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5234   } else {
5235     *rowb = isrowb;
5236   }
5237   if (!colb) {
5238     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5239   } else {
5240     *colb = iscolb;
5241   }
5242   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5243   PetscFunctionReturn(0);
5244 }
5245 
5246 /*
5247     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5248     of the OFF-DIAGONAL portion of local A
5249 
5250     Collective on Mat
5251 
5252    Input Parameters:
5253 +    A,B - the matrices in mpiaij format
5254 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5255 
5256    Output Parameter:
5257 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5258 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5259 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5260 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5261 
5262     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5263      for this matrix. This is not desirable..
5264 
5265     Level: developer
5266 
5267 */
5268 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5269 {
5270   PetscErrorCode         ierr;
5271   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5272   Mat_SeqAIJ             *b_oth;
5273   VecScatter             ctx;
5274   MPI_Comm               comm;
5275   const PetscMPIInt      *rprocs,*sprocs;
5276   const PetscInt         *srow,*rstarts,*sstarts;
5277   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5278   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5279   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5280   MPI_Request            *rwaits = NULL,*swaits = NULL;
5281   MPI_Status             rstatus;
5282   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5283 
5284   PetscFunctionBegin;
5285   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5286   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5287 
5288   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5289     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5290   }
5291   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5292   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5293 
5294   if (size == 1) {
5295     startsj_s = NULL;
5296     bufa_ptr  = NULL;
5297     *B_oth    = NULL;
5298     PetscFunctionReturn(0);
5299   }
5300 
5301   ctx = a->Mvctx;
5302   tag = ((PetscObject)ctx)->tag;
5303 
5304   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5305   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5306   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5307   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5308   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5309   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5310   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5311 
5312   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5313   if (scall == MAT_INITIAL_MATRIX) {
5314     /* i-array */
5315     /*---------*/
5316     /*  post receives */
5317     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5318     for (i=0; i<nrecvs; i++) {
5319       rowlen = rvalues + rstarts[i]*rbs;
5320       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5321       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5322     }
5323 
5324     /* pack the outgoing message */
5325     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5326 
5327     sstartsj[0] = 0;
5328     rstartsj[0] = 0;
5329     len         = 0; /* total length of j or a array to be sent */
5330     if (nsends) {
5331       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5332       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5333     }
5334     for (i=0; i<nsends; i++) {
5335       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5336       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5337       for (j=0; j<nrows; j++) {
5338         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5339         for (l=0; l<sbs; l++) {
5340           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5341 
5342           rowlen[j*sbs+l] = ncols;
5343 
5344           len += ncols;
5345           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5346         }
5347         k++;
5348       }
5349       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5350 
5351       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5352     }
5353     /* recvs and sends of i-array are completed */
5354     i = nrecvs;
5355     while (i--) {
5356       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5357     }
5358     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5359     ierr = PetscFree(svalues);CHKERRQ(ierr);
5360 
5361     /* allocate buffers for sending j and a arrays */
5362     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5363     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5364 
5365     /* create i-array of B_oth */
5366     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5367 
5368     b_othi[0] = 0;
5369     len       = 0; /* total length of j or a array to be received */
5370     k         = 0;
5371     for (i=0; i<nrecvs; i++) {
5372       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5373       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5374       for (j=0; j<nrows; j++) {
5375         b_othi[k+1] = b_othi[k] + rowlen[j];
5376         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5377         k++;
5378       }
5379       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5380     }
5381     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5382 
5383     /* allocate space for j and a arrrays of B_oth */
5384     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5385     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5386 
5387     /* j-array */
5388     /*---------*/
5389     /*  post receives of j-array */
5390     for (i=0; i<nrecvs; i++) {
5391       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5392       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5393     }
5394 
5395     /* pack the outgoing message j-array */
5396     if (nsends) k = sstarts[0];
5397     for (i=0; i<nsends; i++) {
5398       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5399       bufJ  = bufj+sstartsj[i];
5400       for (j=0; j<nrows; j++) {
5401         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5402         for (ll=0; ll<sbs; ll++) {
5403           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5404           for (l=0; l<ncols; l++) {
5405             *bufJ++ = cols[l];
5406           }
5407           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5408         }
5409       }
5410       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5411     }
5412 
5413     /* recvs and sends of j-array are completed */
5414     i = nrecvs;
5415     while (i--) {
5416       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5417     }
5418     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5419   } else if (scall == MAT_REUSE_MATRIX) {
5420     sstartsj = *startsj_s;
5421     rstartsj = *startsj_r;
5422     bufa     = *bufa_ptr;
5423     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5424     b_otha   = b_oth->a;
5425   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5426 
5427   /* a-array */
5428   /*---------*/
5429   /*  post receives of a-array */
5430   for (i=0; i<nrecvs; i++) {
5431     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5432     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5433   }
5434 
5435   /* pack the outgoing message a-array */
5436   if (nsends) k = sstarts[0];
5437   for (i=0; i<nsends; i++) {
5438     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5439     bufA  = bufa+sstartsj[i];
5440     for (j=0; j<nrows; j++) {
5441       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5442       for (ll=0; ll<sbs; ll++) {
5443         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5444         for (l=0; l<ncols; l++) {
5445           *bufA++ = vals[l];
5446         }
5447         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5448       }
5449     }
5450     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5451   }
5452   /* recvs and sends of a-array are completed */
5453   i = nrecvs;
5454   while (i--) {
5455     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5456   }
5457   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5458   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5459 
5460   if (scall == MAT_INITIAL_MATRIX) {
5461     /* put together the new matrix */
5462     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5463 
5464     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5465     /* Since these are PETSc arrays, change flags to free them as necessary. */
5466     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5467     b_oth->free_a  = PETSC_TRUE;
5468     b_oth->free_ij = PETSC_TRUE;
5469     b_oth->nonew   = 0;
5470 
5471     ierr = PetscFree(bufj);CHKERRQ(ierr);
5472     if (!startsj_s || !bufa_ptr) {
5473       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5474       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5475     } else {
5476       *startsj_s = sstartsj;
5477       *startsj_r = rstartsj;
5478       *bufa_ptr  = bufa;
5479     }
5480   }
5481 
5482   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5483   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5484   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5485   PetscFunctionReturn(0);
5486 }
5487 
5488 /*@C
5489   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5490 
5491   Not Collective
5492 
5493   Input Parameters:
5494 . A - The matrix in mpiaij format
5495 
5496   Output Parameter:
5497 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5498 . colmap - A map from global column index to local index into lvec
5499 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5500 
5501   Level: developer
5502 
5503 @*/
5504 #if defined(PETSC_USE_CTABLE)
5505 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5506 #else
5507 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5508 #endif
5509 {
5510   Mat_MPIAIJ *a;
5511 
5512   PetscFunctionBegin;
5513   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5514   PetscValidPointer(lvec, 2);
5515   PetscValidPointer(colmap, 3);
5516   PetscValidPointer(multScatter, 4);
5517   a = (Mat_MPIAIJ*) A->data;
5518   if (lvec) *lvec = a->lvec;
5519   if (colmap) *colmap = a->colmap;
5520   if (multScatter) *multScatter = a->Mvctx;
5521   PetscFunctionReturn(0);
5522 }
5523 
5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5526 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5527 #if defined(PETSC_HAVE_MKL_SPARSE)
5528 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5529 #endif
5530 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5531 #if defined(PETSC_HAVE_ELEMENTAL)
5532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5533 #endif
5534 #if defined(PETSC_HAVE_HYPRE)
5535 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5536 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5537 #endif
5538 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5540 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5541 
5542 /*
5543     Computes (B'*A')' since computing B*A directly is untenable
5544 
5545                n                       p                          p
5546         (              )       (              )         (                  )
5547       m (      A       )  *  n (       B      )   =   m (         C        )
5548         (              )       (              )         (                  )
5549 
5550 */
5551 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5552 {
5553   PetscErrorCode ierr;
5554   Mat            At,Bt,Ct;
5555 
5556   PetscFunctionBegin;
5557   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5558   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5559   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5560   ierr = MatDestroy(&At);CHKERRQ(ierr);
5561   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5562   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5563   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5564   PetscFunctionReturn(0);
5565 }
5566 
5567 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5568 {
5569   PetscErrorCode ierr;
5570   PetscInt       m=A->rmap->n,n=B->cmap->n;
5571   Mat            Cmat;
5572 
5573   PetscFunctionBegin;
5574   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5575   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5576   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5577   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5578   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5579   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5580   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5581   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5582 
5583   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5584 
5585   *C = Cmat;
5586   PetscFunctionReturn(0);
5587 }
5588 
5589 /* ----------------------------------------------------------------*/
5590 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5591 {
5592   PetscErrorCode ierr;
5593 
5594   PetscFunctionBegin;
5595   if (scall == MAT_INITIAL_MATRIX) {
5596     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5597     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5598     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5599   }
5600   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5601   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5602   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5603   PetscFunctionReturn(0);
5604 }
5605 
5606 /*MC
5607    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5608 
5609    Options Database Keys:
5610 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5611 
5612   Level: beginner
5613 
5614 .seealso: MatCreateAIJ()
5615 M*/
5616 
5617 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5618 {
5619   Mat_MPIAIJ     *b;
5620   PetscErrorCode ierr;
5621   PetscMPIInt    size;
5622 
5623   PetscFunctionBegin;
5624   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5625 
5626   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5627   B->data       = (void*)b;
5628   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5629   B->assembled  = PETSC_FALSE;
5630   B->insertmode = NOT_SET_VALUES;
5631   b->size       = size;
5632 
5633   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5634 
5635   /* build cache for off array entries formed */
5636   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5637 
5638   b->donotstash  = PETSC_FALSE;
5639   b->colmap      = 0;
5640   b->garray      = 0;
5641   b->roworiented = PETSC_TRUE;
5642 
5643   /* stuff used for matrix vector multiply */
5644   b->lvec  = NULL;
5645   b->Mvctx = NULL;
5646 
5647   /* stuff for MatGetRow() */
5648   b->rowindices   = 0;
5649   b->rowvalues    = 0;
5650   b->getrowactive = PETSC_FALSE;
5651 
5652   /* flexible pointer used in CUSP/CUSPARSE classes */
5653   b->spptr = NULL;
5654 
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5665 #if defined(PETSC_HAVE_MKL_SPARSE)
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5667 #endif
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5670 #if defined(PETSC_HAVE_ELEMENTAL)
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5672 #endif
5673 #if defined(PETSC_HAVE_HYPRE)
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5675 #endif
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5681 #if defined(PETSC_HAVE_HYPRE)
5682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5683 #endif
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5686   PetscFunctionReturn(0);
5687 }
5688 
5689 /*@C
5690      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5691          and "off-diagonal" part of the matrix in CSR format.
5692 
5693    Collective on MPI_Comm
5694 
5695    Input Parameters:
5696 +  comm - MPI communicator
5697 .  m - number of local rows (Cannot be PETSC_DECIDE)
5698 .  n - This value should be the same as the local size used in creating the
5699        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5700        calculated if N is given) For square matrices n is almost always m.
5701 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5702 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5703 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5704 .   j - column indices
5705 .   a - matrix values
5706 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5707 .   oj - column indices
5708 -   oa - matrix values
5709 
5710    Output Parameter:
5711 .   mat - the matrix
5712 
5713    Level: advanced
5714 
5715    Notes:
5716        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5717        must free the arrays once the matrix has been destroyed and not before.
5718 
5719        The i and j indices are 0 based
5720 
5721        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5722 
5723        This sets local rows and cannot be used to set off-processor values.
5724 
5725        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5726        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5727        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5728        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5729        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5730        communication if it is known that only local entries will be set.
5731 
5732 .keywords: matrix, aij, compressed row, sparse, parallel
5733 
5734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5735           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5736 @*/
5737 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5738 {
5739   PetscErrorCode ierr;
5740   Mat_MPIAIJ     *maij;
5741 
5742   PetscFunctionBegin;
5743   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5744   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5745   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5746   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5747   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5748   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5749   maij = (Mat_MPIAIJ*) (*mat)->data;
5750 
5751   (*mat)->preallocated = PETSC_TRUE;
5752 
5753   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5754   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5755 
5756   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5757   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5758 
5759   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5760   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5761   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5762   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5763 
5764   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5765   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5766   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5767   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5768   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5769   PetscFunctionReturn(0);
5770 }
5771 
5772 /*
5773     Special version for direct calls from Fortran
5774 */
5775 #include <petsc/private/fortranimpl.h>
5776 
5777 /* Change these macros so can be used in void function */
5778 #undef CHKERRQ
5779 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5780 #undef SETERRQ2
5781 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5782 #undef SETERRQ3
5783 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5784 #undef SETERRQ
5785 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5786 
5787 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5788 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5789 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5790 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5791 #else
5792 #endif
5793 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5794 {
5795   Mat            mat  = *mmat;
5796   PetscInt       m    = *mm, n = *mn;
5797   InsertMode     addv = *maddv;
5798   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5799   PetscScalar    value;
5800   PetscErrorCode ierr;
5801 
5802   MatCheckPreallocated(mat,1);
5803   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5804 
5805 #if defined(PETSC_USE_DEBUG)
5806   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5807 #endif
5808   {
5809     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5810     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5811     PetscBool roworiented = aij->roworiented;
5812 
5813     /* Some Variables required in the macro */
5814     Mat        A                 = aij->A;
5815     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5816     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5817     MatScalar  *aa               = a->a;
5818     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5819     Mat        B                 = aij->B;
5820     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5821     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5822     MatScalar  *ba               = b->a;
5823 
5824     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5825     PetscInt  nonew = a->nonew;
5826     MatScalar *ap1,*ap2;
5827 
5828     PetscFunctionBegin;
5829     for (i=0; i<m; i++) {
5830       if (im[i] < 0) continue;
5831 #if defined(PETSC_USE_DEBUG)
5832       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5833 #endif
5834       if (im[i] >= rstart && im[i] < rend) {
5835         row      = im[i] - rstart;
5836         lastcol1 = -1;
5837         rp1      = aj + ai[row];
5838         ap1      = aa + ai[row];
5839         rmax1    = aimax[row];
5840         nrow1    = ailen[row];
5841         low1     = 0;
5842         high1    = nrow1;
5843         lastcol2 = -1;
5844         rp2      = bj + bi[row];
5845         ap2      = ba + bi[row];
5846         rmax2    = bimax[row];
5847         nrow2    = bilen[row];
5848         low2     = 0;
5849         high2    = nrow2;
5850 
5851         for (j=0; j<n; j++) {
5852           if (roworiented) value = v[i*n+j];
5853           else value = v[i+j*m];
5854           if (in[j] >= cstart && in[j] < cend) {
5855             col = in[j] - cstart;
5856             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5857             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5858           } else if (in[j] < 0) continue;
5859 #if defined(PETSC_USE_DEBUG)
5860           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5861           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5862 #endif
5863           else {
5864             if (mat->was_assembled) {
5865               if (!aij->colmap) {
5866                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5867               }
5868 #if defined(PETSC_USE_CTABLE)
5869               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5870               col--;
5871 #else
5872               col = aij->colmap[in[j]] - 1;
5873 #endif
5874               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5875               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5876                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5877                 col  =  in[j];
5878                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5879                 B     = aij->B;
5880                 b     = (Mat_SeqAIJ*)B->data;
5881                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5882                 rp2   = bj + bi[row];
5883                 ap2   = ba + bi[row];
5884                 rmax2 = bimax[row];
5885                 nrow2 = bilen[row];
5886                 low2  = 0;
5887                 high2 = nrow2;
5888                 bm    = aij->B->rmap->n;
5889                 ba    = b->a;
5890               }
5891             } else col = in[j];
5892             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5893           }
5894         }
5895       } else if (!aij->donotstash) {
5896         if (roworiented) {
5897           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5898         } else {
5899           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5900         }
5901       }
5902     }
5903   }
5904   PetscFunctionReturnVoid();
5905 }
5906