xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 94a2d30dd2523562d476bcf92571b7b07eeb5a66)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_DEVICE)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_DEVICE)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_DEVICE)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_DEVICE)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_DEVICE)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_DEVICE)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = NULL;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_DEVICE)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_CUDA)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_ELEMENTAL)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1302 #endif
1303 #if defined(PETSC_HAVE_SCALAPACK)
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1305 #endif
1306 #if defined(PETSC_HAVE_HYPRE)
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1309 #endif
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1316 #if defined(PETSC_HAVE_MKL_SPARSE)
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1318 #endif
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1320   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1321   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1326 {
1327   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1328   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1329   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1330   const PetscInt    *garray = aij->garray;
1331   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1332   PetscInt          *rowlens;
1333   PetscInt          *colidxs;
1334   PetscScalar       *matvals;
1335   PetscErrorCode    ierr;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1339 
1340   M  = mat->rmap->N;
1341   N  = mat->cmap->N;
1342   m  = mat->rmap->n;
1343   rs = mat->rmap->rstart;
1344   cs = mat->cmap->rstart;
1345   nz = A->nz + B->nz;
1346 
1347   /* write matrix header */
1348   header[0] = MAT_FILE_CLASSID;
1349   header[1] = M; header[2] = N; header[3] = nz;
1350   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1351   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1352 
1353   /* fill in and store row lengths  */
1354   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1355   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1356   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1357   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1358 
1359   /* fill in and store column indices */
1360   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1361   for (cnt=0, i=0; i<m; i++) {
1362     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1363       if (garray[B->j[jb]] > cs) break;
1364       colidxs[cnt++] = garray[B->j[jb]];
1365     }
1366     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1367       colidxs[cnt++] = A->j[ja] + cs;
1368     for (; jb<B->i[i+1]; jb++)
1369       colidxs[cnt++] = garray[B->j[jb]];
1370   }
1371   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1372   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1373   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1374 
1375   /* fill in and store nonzero values */
1376   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1377   for (cnt=0, i=0; i<m; i++) {
1378     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1379       if (garray[B->j[jb]] > cs) break;
1380       matvals[cnt++] = B->a[jb];
1381     }
1382     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1383       matvals[cnt++] = A->a[ja];
1384     for (; jb<B->i[i+1]; jb++)
1385       matvals[cnt++] = B->a[jb];
1386   }
1387   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1388   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1389   ierr = PetscFree(matvals);CHKERRQ(ierr);
1390 
1391   /* write block size option to the viewer's .info file */
1392   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #include <petscdraw.h>
1397 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1398 {
1399   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1400   PetscErrorCode    ierr;
1401   PetscMPIInt       rank = aij->rank,size = aij->size;
1402   PetscBool         isdraw,iascii,isbinary;
1403   PetscViewer       sviewer;
1404   PetscViewerFormat format;
1405 
1406   PetscFunctionBegin;
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1410   if (iascii) {
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1413       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1414       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1415       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1416       for (i=0; i<(PetscInt)size; i++) {
1417         nmax = PetscMax(nmax,nz[i]);
1418         nmin = PetscMin(nmin,nz[i]);
1419         navg += nz[i];
1420       }
1421       ierr = PetscFree(nz);CHKERRQ(ierr);
1422       navg = navg/size;
1423       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1424       PetscFunctionReturn(0);
1425     }
1426     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1427     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1428       MatInfo   info;
1429       PetscBool inodes;
1430 
1431       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1432       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1433       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1434       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1435       if (!inodes) {
1436         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1437                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1438       } else {
1439         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1440                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1441       }
1442       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1443       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1444       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1445       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1446       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1447       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1449       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1450       PetscFunctionReturn(0);
1451     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1452       PetscInt inodecount,inodelimit,*inodes;
1453       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1454       if (inodes) {
1455         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1456       } else {
1457         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1458       }
1459       PetscFunctionReturn(0);
1460     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1461       PetscFunctionReturn(0);
1462     }
1463   } else if (isbinary) {
1464     if (size == 1) {
1465       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1466       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1467     } else {
1468       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1469     }
1470     PetscFunctionReturn(0);
1471   } else if (iascii && size == 1) {
1472     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1473     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1474     PetscFunctionReturn(0);
1475   } else if (isdraw) {
1476     PetscDraw draw;
1477     PetscBool isnull;
1478     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1479     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1480     if (isnull) PetscFunctionReturn(0);
1481   }
1482 
1483   { /* assemble the entire matrix onto first processor */
1484     Mat A = NULL, Av;
1485     IS  isrow,iscol;
1486 
1487     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1488     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1489     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1490     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1491 /*  The commented code uses MatCreateSubMatrices instead */
1492 /*
1493     Mat *AA, A = NULL, Av;
1494     IS  isrow,iscol;
1495 
1496     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1497     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1498     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1499     if (!rank) {
1500        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1501        A    = AA[0];
1502        Av   = AA[0];
1503     }
1504     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1505 */
1506     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1507     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1508     /*
1509        Everyone has to call to draw the matrix since the graphics waits are
1510        synchronized across all processors that share the PetscDraw object
1511     */
1512     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1513     if (!rank) {
1514       if (((PetscObject)mat)->name) {
1515         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1516       }
1517       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1518     }
1519     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1520     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1521     ierr = MatDestroy(&A);CHKERRQ(ierr);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1527 {
1528   PetscErrorCode ierr;
1529   PetscBool      iascii,isdraw,issocket,isbinary;
1530 
1531   PetscFunctionBegin;
1532   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1533   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1534   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1535   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1536   if (iascii || isdraw || isbinary || issocket) {
1537     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1538   }
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1543 {
1544   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1545   PetscErrorCode ierr;
1546   Vec            bb1 = NULL;
1547   PetscBool      hasop;
1548 
1549   PetscFunctionBegin;
1550   if (flag == SOR_APPLY_UPPER) {
1551     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552     PetscFunctionReturn(0);
1553   }
1554 
1555   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1556     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1557   }
1558 
1559   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1560     if (flag & SOR_ZERO_INITIAL_GUESS) {
1561       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1562       its--;
1563     }
1564 
1565     while (its--) {
1566       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1567       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568 
1569       /* update rhs: bb1 = bb - B*x */
1570       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1571       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1572 
1573       /* local sweep */
1574       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1575     }
1576   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1577     if (flag & SOR_ZERO_INITIAL_GUESS) {
1578       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1579       its--;
1580     }
1581     while (its--) {
1582       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584 
1585       /* update rhs: bb1 = bb - B*x */
1586       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1587       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1588 
1589       /* local sweep */
1590       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1591     }
1592   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1593     if (flag & SOR_ZERO_INITIAL_GUESS) {
1594       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1595       its--;
1596     }
1597     while (its--) {
1598       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1599       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600 
1601       /* update rhs: bb1 = bb - B*x */
1602       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1603       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1604 
1605       /* local sweep */
1606       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1607     }
1608   } else if (flag & SOR_EISENSTAT) {
1609     Vec xx1;
1610 
1611     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1612     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1613 
1614     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1615     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616     if (!mat->diag) {
1617       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1618       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1619     }
1620     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1621     if (hasop) {
1622       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1623     } else {
1624       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1625     }
1626     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1627 
1628     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1629 
1630     /* local sweep */
1631     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1632     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1633     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1634   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1635 
1636   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1637 
1638   matin->factorerrortype = mat->A->factorerrortype;
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1643 {
1644   Mat            aA,aB,Aperm;
1645   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1646   PetscScalar    *aa,*ba;
1647   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1648   PetscSF        rowsf,sf;
1649   IS             parcolp = NULL;
1650   PetscBool      done;
1651   PetscErrorCode ierr;
1652 
1653   PetscFunctionBegin;
1654   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1655   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1656   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1657   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1658 
1659   /* Invert row permutation to find out where my rows should go */
1660   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1661   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1662   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1663   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1664   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1665   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1666 
1667   /* Invert column permutation to find out where my columns should go */
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1672   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1673   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1674   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1675 
1676   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1677   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1678   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1679 
1680   /* Find out where my gcols should go */
1681   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1682   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1683   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1684   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1685   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1686   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1687   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1688   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1689 
1690   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1691   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1692   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1693   for (i=0; i<m; i++) {
1694     PetscInt    row = rdest[i];
1695     PetscMPIInt rowner;
1696     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1697     for (j=ai[i]; j<ai[i+1]; j++) {
1698       PetscInt    col = cdest[aj[j]];
1699       PetscMPIInt cowner;
1700       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1701       if (rowner == cowner) dnnz[i]++;
1702       else onnz[i]++;
1703     }
1704     for (j=bi[i]; j<bi[i+1]; j++) {
1705       PetscInt    col = gcdest[bj[j]];
1706       PetscMPIInt cowner;
1707       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1708       if (rowner == cowner) dnnz[i]++;
1709       else onnz[i]++;
1710     }
1711   }
1712   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1713   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1714   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1715   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1716   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1717 
1718   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1719   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1720   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1721   for (i=0; i<m; i++) {
1722     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1723     PetscInt j0,rowlen;
1724     rowlen = ai[i+1] - ai[i];
1725     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1726       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1727       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1728     }
1729     rowlen = bi[i+1] - bi[i];
1730     for (j0=j=0; j<rowlen; j0=j) {
1731       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1732       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1733     }
1734   }
1735   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1736   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1737   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1738   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1739   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1740   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1741   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1742   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1743   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1744   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1745   *B = Aperm;
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1750 {
1751   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1756   if (ghosts) *ghosts = aij->garray;
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1761 {
1762   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1763   Mat            A    = mat->A,B = mat->B;
1764   PetscErrorCode ierr;
1765   PetscLogDouble isend[5],irecv[5];
1766 
1767   PetscFunctionBegin;
1768   info->block_size = 1.0;
1769   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1770 
1771   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1772   isend[3] = info->memory;  isend[4] = info->mallocs;
1773 
1774   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1775 
1776   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1777   isend[3] += info->memory;  isend[4] += info->mallocs;
1778   if (flag == MAT_LOCAL) {
1779     info->nz_used      = isend[0];
1780     info->nz_allocated = isend[1];
1781     info->nz_unneeded  = isend[2];
1782     info->memory       = isend[3];
1783     info->mallocs      = isend[4];
1784   } else if (flag == MAT_GLOBAL_MAX) {
1785     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1786 
1787     info->nz_used      = irecv[0];
1788     info->nz_allocated = irecv[1];
1789     info->nz_unneeded  = irecv[2];
1790     info->memory       = irecv[3];
1791     info->mallocs      = irecv[4];
1792   } else if (flag == MAT_GLOBAL_SUM) {
1793     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1794 
1795     info->nz_used      = irecv[0];
1796     info->nz_allocated = irecv[1];
1797     info->nz_unneeded  = irecv[2];
1798     info->memory       = irecv[3];
1799     info->mallocs      = irecv[4];
1800   }
1801   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1802   info->fill_ratio_needed = 0;
1803   info->factor_mallocs    = 0;
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1808 {
1809   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   switch (op) {
1814   case MAT_NEW_NONZERO_LOCATIONS:
1815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1816   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1817   case MAT_KEEP_NONZERO_PATTERN:
1818   case MAT_NEW_NONZERO_LOCATION_ERR:
1819   case MAT_USE_INODES:
1820   case MAT_IGNORE_ZERO_ENTRIES:
1821     MatCheckPreallocated(A,1);
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_ROW_ORIENTED:
1826     MatCheckPreallocated(A,1);
1827     a->roworiented = flg;
1828 
1829     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1830     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1831     break;
1832   case MAT_NEW_DIAGONALS:
1833   case MAT_SORTED_FULL:
1834     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1835     break;
1836   case MAT_IGNORE_OFF_PROC_ENTRIES:
1837     a->donotstash = flg;
1838     break;
1839   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1840   case MAT_SPD:
1841   case MAT_SYMMETRIC:
1842   case MAT_STRUCTURALLY_SYMMETRIC:
1843   case MAT_HERMITIAN:
1844   case MAT_SYMMETRY_ETERNAL:
1845     break;
1846   case MAT_SUBMAT_SINGLEIS:
1847     A->submat_singleis = flg;
1848     break;
1849   case MAT_STRUCTURE_ONLY:
1850     /* The option is handled directly by MatSetOption() */
1851     break;
1852   default:
1853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1854   }
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1859 {
1860   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1861   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1862   PetscErrorCode ierr;
1863   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1864   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1865   PetscInt       *cmap,*idx_p;
1866 
1867   PetscFunctionBegin;
1868   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1869   mat->getrowactive = PETSC_TRUE;
1870 
1871   if (!mat->rowvalues && (idx || v)) {
1872     /*
1873         allocate enough space to hold information from the longest row.
1874     */
1875     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1876     PetscInt   max = 1,tmp;
1877     for (i=0; i<matin->rmap->n; i++) {
1878       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1879       if (max < tmp) max = tmp;
1880     }
1881     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1882   }
1883 
1884   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1885   lrow = row - rstart;
1886 
1887   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1888   if (!v)   {pvA = NULL; pvB = NULL;}
1889   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1890   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1891   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1892   nztot = nzA + nzB;
1893 
1894   cmap = mat->garray;
1895   if (v  || idx) {
1896     if (nztot) {
1897       /* Sort by increasing column numbers, assuming A and B already sorted */
1898       PetscInt imark = -1;
1899       if (v) {
1900         *v = v_p = mat->rowvalues;
1901         for (i=0; i<nzB; i++) {
1902           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1903           else break;
1904         }
1905         imark = i;
1906         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1907         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1908       }
1909       if (idx) {
1910         *idx = idx_p = mat->rowindices;
1911         if (imark > -1) {
1912           for (i=0; i<imark; i++) {
1913             idx_p[i] = cmap[cworkB[i]];
1914           }
1915         } else {
1916           for (i=0; i<nzB; i++) {
1917             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1918             else break;
1919           }
1920           imark = i;
1921         }
1922         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1923         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1924       }
1925     } else {
1926       if (idx) *idx = NULL;
1927       if (v)   *v   = NULL;
1928     }
1929   }
1930   *nz  = nztot;
1931   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1932   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1937 {
1938   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1939 
1940   PetscFunctionBegin;
1941   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1942   aij->getrowactive = PETSC_FALSE;
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1947 {
1948   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1949   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1950   PetscErrorCode ierr;
1951   PetscInt       i,j,cstart = mat->cmap->rstart;
1952   PetscReal      sum = 0.0;
1953   MatScalar      *v;
1954 
1955   PetscFunctionBegin;
1956   if (aij->size == 1) {
1957     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1958   } else {
1959     if (type == NORM_FROBENIUS) {
1960       v = amat->a;
1961       for (i=0; i<amat->nz; i++) {
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963       }
1964       v = bmat->a;
1965       for (i=0; i<bmat->nz; i++) {
1966         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1967       }
1968       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1969       *norm = PetscSqrtReal(*norm);
1970       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1971     } else if (type == NORM_1) { /* max column norm */
1972       PetscReal *tmp,*tmp2;
1973       PetscInt  *jj,*garray = aij->garray;
1974       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1975       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1976       *norm = 0.0;
1977       v     = amat->a; jj = amat->j;
1978       for (j=0; j<amat->nz; j++) {
1979         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1980       }
1981       v = bmat->a; jj = bmat->j;
1982       for (j=0; j<bmat->nz; j++) {
1983         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1984       }
1985       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1986       for (j=0; j<mat->cmap->N; j++) {
1987         if (tmp2[j] > *norm) *norm = tmp2[j];
1988       }
1989       ierr = PetscFree(tmp);CHKERRQ(ierr);
1990       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1991       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1992     } else if (type == NORM_INFINITY) { /* max row norm */
1993       PetscReal ntemp = 0.0;
1994       for (j=0; j<aij->A->rmap->n; j++) {
1995         v   = amat->a + amat->i[j];
1996         sum = 0.0;
1997         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1998           sum += PetscAbsScalar(*v); v++;
1999         }
2000         v = bmat->a + bmat->i[j];
2001         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2002           sum += PetscAbsScalar(*v); v++;
2003         }
2004         if (sum > ntemp) ntemp = sum;
2005       }
2006       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2007       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2008     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2014 {
2015   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2016   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2017   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2018   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2019   PetscErrorCode  ierr;
2020   Mat             B,A_diag,*B_diag;
2021   const MatScalar *array;
2022 
2023   PetscFunctionBegin;
2024   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2025   ai = Aloc->i; aj = Aloc->j;
2026   bi = Bloc->i; bj = Bloc->j;
2027   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2028     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2029     PetscSFNode          *oloc;
2030     PETSC_UNUSED PetscSF sf;
2031 
2032     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2033     /* compute d_nnz for preallocation */
2034     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2035     for (i=0; i<ai[ma]; i++) {
2036       d_nnz[aj[i]]++;
2037     }
2038     /* compute local off-diagonal contributions */
2039     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2040     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2041     /* map those to global */
2042     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2043     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2044     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2045     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2046     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2047     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2048     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2049 
2050     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2051     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2052     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2053     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2054     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2055     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2056   } else {
2057     B    = *matout;
2058     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2059   }
2060 
2061   b           = (Mat_MPIAIJ*)B->data;
2062   A_diag      = a->A;
2063   B_diag      = &b->A;
2064   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2065   A_diag_ncol = A_diag->cmap->N;
2066   B_diag_ilen = sub_B_diag->ilen;
2067   B_diag_i    = sub_B_diag->i;
2068 
2069   /* Set ilen for diagonal of B */
2070   for (i=0; i<A_diag_ncol; i++) {
2071     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2072   }
2073 
2074   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2075   very quickly (=without using MatSetValues), because all writes are local. */
2076   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2077 
2078   /* copy over the B part */
2079   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2080   array = Bloc->a;
2081   row   = A->rmap->rstart;
2082   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2083   cols_tmp = cols;
2084   for (i=0; i<mb; i++) {
2085     ncol = bi[i+1]-bi[i];
2086     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2087     row++;
2088     array += ncol; cols_tmp += ncol;
2089   }
2090   ierr = PetscFree(cols);CHKERRQ(ierr);
2091 
2092   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2093   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2094   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2095     *matout = B;
2096   } else {
2097     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2098   }
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2103 {
2104   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2105   Mat            a    = aij->A,b = aij->B;
2106   PetscErrorCode ierr;
2107   PetscInt       s1,s2,s3;
2108 
2109   PetscFunctionBegin;
2110   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2111   if (rr) {
2112     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2113     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2114     /* Overlap communication with computation. */
2115     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2116   }
2117   if (ll) {
2118     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2119     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2120     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2121   }
2122   /* scale  the diagonal block */
2123   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2124 
2125   if (rr) {
2126     /* Do a scatter end and then right scale the off-diagonal block */
2127     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2128     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2129   }
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2134 {
2135   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2136   PetscErrorCode ierr;
2137 
2138   PetscFunctionBegin;
2139   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2144 {
2145   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2146   Mat            a,b,c,d;
2147   PetscBool      flg;
2148   PetscErrorCode ierr;
2149 
2150   PetscFunctionBegin;
2151   a = matA->A; b = matA->B;
2152   c = matB->A; d = matB->B;
2153 
2154   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2155   if (flg) {
2156     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2157   }
2158   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2163 {
2164   PetscErrorCode ierr;
2165   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2166   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2167 
2168   PetscFunctionBegin;
2169   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2170   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2171     /* because of the column compression in the off-processor part of the matrix a->B,
2172        the number of columns in a->B and b->B may be different, hence we cannot call
2173        the MatCopy() directly on the two parts. If need be, we can provide a more
2174        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2175        then copying the submatrices */
2176     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2177   } else {
2178     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2179     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2180   }
2181   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2186 {
2187   PetscErrorCode ierr;
2188 
2189   PetscFunctionBegin;
2190   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 /*
2195    Computes the number of nonzeros per row needed for preallocation when X and Y
2196    have different nonzero structure.
2197 */
2198 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2199 {
2200   PetscInt       i,j,k,nzx,nzy;
2201 
2202   PetscFunctionBegin;
2203   /* Set the number of nonzeros in the new matrix */
2204   for (i=0; i<m; i++) {
2205     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2206     nzx = xi[i+1] - xi[i];
2207     nzy = yi[i+1] - yi[i];
2208     nnz[i] = 0;
2209     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2210       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2211       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2212       nnz[i]++;
2213     }
2214     for (; k<nzy; k++) nnz[i]++;
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2220 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2221 {
2222   PetscErrorCode ierr;
2223   PetscInt       m = Y->rmap->N;
2224   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2225   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2226 
2227   PetscFunctionBegin;
2228   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2233 {
2234   PetscErrorCode ierr;
2235   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2236   PetscBLASInt   bnz,one=1;
2237   Mat_SeqAIJ     *x,*y;
2238 
2239   PetscFunctionBegin;
2240   if (str == SAME_NONZERO_PATTERN) {
2241     PetscScalar alpha = a;
2242     x    = (Mat_SeqAIJ*)xx->A->data;
2243     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2244     y    = (Mat_SeqAIJ*)yy->A->data;
2245     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2246     x    = (Mat_SeqAIJ*)xx->B->data;
2247     y    = (Mat_SeqAIJ*)yy->B->data;
2248     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2249     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2250     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2251     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2252        will be updated */
2253 #if defined(PETSC_HAVE_DEVICE)
2254     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2255       Y->offloadmask = PETSC_OFFLOAD_CPU;
2256     }
2257 #endif
2258   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2259     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2260   } else {
2261     Mat      B;
2262     PetscInt *nnz_d,*nnz_o;
2263     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2264     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2265     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2266     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2267     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2268     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2269     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2270     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2271     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2272     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2273     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2274     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2275     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2281 
2282 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2283 {
2284 #if defined(PETSC_USE_COMPLEX)
2285   PetscErrorCode ierr;
2286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2290   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2291 #else
2292   PetscFunctionBegin;
2293 #endif
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2298 {
2299   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2304   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2309 {
2310   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2315   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2320 {
2321   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2322   PetscErrorCode    ierr;
2323   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2324   PetscScalar       *va,*vv;
2325   Vec               vB,vA;
2326   const PetscScalar *vb;
2327 
2328   PetscFunctionBegin;
2329   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2330   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2331 
2332   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2333   if (idx) {
2334     for (i=0; i<m; i++) {
2335       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2336     }
2337   }
2338 
2339   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2340   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2341   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2342 
2343   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2344   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2345   for (i=0; i<m; i++) {
2346     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2347       vv[i] = vb[i];
2348       if (idx) idx[i] = a->garray[idxb[i]];
2349     } else {
2350       vv[i] = va[i];
2351       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2352         idx[i] = a->garray[idxb[i]];
2353     }
2354   }
2355   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2356   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2357   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2358   ierr = PetscFree(idxb);CHKERRQ(ierr);
2359   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2360   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2365 {
2366   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2367   PetscInt       m = A->rmap->n,n = A->cmap->n;
2368   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2369   PetscInt       *cmap  = mat->garray;
2370   PetscInt       *diagIdx, *offdiagIdx;
2371   Vec            diagV, offdiagV;
2372   PetscScalar    *a, *diagA, *offdiagA, *ba;
2373   PetscInt       r,j,col,ncols,*bi,*bj;
2374   PetscErrorCode ierr;
2375   Mat            B = mat->B;
2376   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2377 
2378   PetscFunctionBegin;
2379   /* When a process holds entire A and other processes have no entry */
2380   if (A->cmap->N == n) {
2381     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2382     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2383     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2384     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2385     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2386     PetscFunctionReturn(0);
2387   } else if (n == 0) {
2388     if (m) {
2389       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2390       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2391       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2392     }
2393     PetscFunctionReturn(0);
2394   }
2395 
2396   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2397   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2398   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2399   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2400 
2401   /* Get offdiagIdx[] for implicit 0.0 */
2402   ba = b->a;
2403   bi = b->i;
2404   bj = b->j;
2405   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2406   for (r = 0; r < m; r++) {
2407     ncols = bi[r+1] - bi[r];
2408     if (ncols == A->cmap->N - n) { /* Brow is dense */
2409       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2410     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2411       offdiagA[r] = 0.0;
2412 
2413       /* Find first hole in the cmap */
2414       for (j=0; j<ncols; j++) {
2415         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2416         if (col > j && j < cstart) {
2417           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2418           break;
2419         } else if (col > j + n && j >= cstart) {
2420           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2421           break;
2422         }
2423       }
2424       if (j == ncols && ncols < A->cmap->N - n) {
2425         /* a hole is outside compressed Bcols */
2426         if (ncols == 0) {
2427           if (cstart) {
2428             offdiagIdx[r] = 0;
2429           } else offdiagIdx[r] = cend;
2430         } else { /* ncols > 0 */
2431           offdiagIdx[r] = cmap[ncols-1] + 1;
2432           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2433         }
2434       }
2435     }
2436 
2437     for (j=0; j<ncols; j++) {
2438       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2439       ba++; bj++;
2440     }
2441   }
2442 
2443   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2444   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2445   for (r = 0; r < m; ++r) {
2446     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2447       a[r]   = diagA[r];
2448       if (idx) idx[r] = cstart + diagIdx[r];
2449     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2450       a[r] = diagA[r];
2451       if (idx) {
2452         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2453           idx[r] = cstart + diagIdx[r];
2454         } else idx[r] = offdiagIdx[r];
2455       }
2456     } else {
2457       a[r]   = offdiagA[r];
2458       if (idx) idx[r] = offdiagIdx[r];
2459     }
2460   }
2461   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2462   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2463   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2464   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2465   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2466   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2471 {
2472   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2473   PetscInt       m = A->rmap->n,n = A->cmap->n;
2474   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2475   PetscInt       *cmap  = mat->garray;
2476   PetscInt       *diagIdx, *offdiagIdx;
2477   Vec            diagV, offdiagV;
2478   PetscScalar    *a, *diagA, *offdiagA, *ba;
2479   PetscInt       r,j,col,ncols,*bi,*bj;
2480   PetscErrorCode ierr;
2481   Mat            B = mat->B;
2482   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2483 
2484   PetscFunctionBegin;
2485   /* When a process holds entire A and other processes have no entry */
2486   if (A->cmap->N == n) {
2487     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2488     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2489     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2490     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2491     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2492     PetscFunctionReturn(0);
2493   } else if (n == 0) {
2494     if (m) {
2495       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2496       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2497       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2498     }
2499     PetscFunctionReturn(0);
2500   }
2501 
2502   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2503   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2504   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2505   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2506 
2507   /* Get offdiagIdx[] for implicit 0.0 */
2508   ba = b->a;
2509   bi = b->i;
2510   bj = b->j;
2511   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2512   for (r = 0; r < m; r++) {
2513     ncols = bi[r+1] - bi[r];
2514     if (ncols == A->cmap->N - n) { /* Brow is dense */
2515       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2516     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2517       offdiagA[r] = 0.0;
2518 
2519       /* Find first hole in the cmap */
2520       for (j=0; j<ncols; j++) {
2521         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2522         if (col > j && j < cstart) {
2523           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2524           break;
2525         } else if (col > j + n && j >= cstart) {
2526           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2527           break;
2528         }
2529       }
2530       if (j == ncols && ncols < A->cmap->N - n) {
2531         /* a hole is outside compressed Bcols */
2532         if (ncols == 0) {
2533           if (cstart) {
2534             offdiagIdx[r] = 0;
2535           } else offdiagIdx[r] = cend;
2536         } else { /* ncols > 0 */
2537           offdiagIdx[r] = cmap[ncols-1] + 1;
2538           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2539         }
2540       }
2541     }
2542 
2543     for (j=0; j<ncols; j++) {
2544       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2545       ba++; bj++;
2546     }
2547   }
2548 
2549   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2550   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2551   for (r = 0; r < m; ++r) {
2552     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2553       a[r]   = diagA[r];
2554       if (idx) idx[r] = cstart + diagIdx[r];
2555     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2556       a[r] = diagA[r];
2557       if (idx) {
2558         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2559           idx[r] = cstart + diagIdx[r];
2560         } else idx[r] = offdiagIdx[r];
2561       }
2562     } else {
2563       a[r]   = offdiagA[r];
2564       if (idx) idx[r] = offdiagIdx[r];
2565     }
2566   }
2567   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2568   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2569   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2570   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2571   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2572   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2577 {
2578   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2579   PetscInt       m = A->rmap->n,n = A->cmap->n;
2580   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2581   PetscInt       *cmap  = mat->garray;
2582   PetscInt       *diagIdx, *offdiagIdx;
2583   Vec            diagV, offdiagV;
2584   PetscScalar    *a, *diagA, *offdiagA, *ba;
2585   PetscInt       r,j,col,ncols,*bi,*bj;
2586   PetscErrorCode ierr;
2587   Mat            B = mat->B;
2588   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2589 
2590   PetscFunctionBegin;
2591   /* When a process holds entire A and other processes have no entry */
2592   if (A->cmap->N == n) {
2593     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2594     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2595     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2596     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2597     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2598     PetscFunctionReturn(0);
2599   } else if (n == 0) {
2600     if (m) {
2601       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2602       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2603       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2604     }
2605     PetscFunctionReturn(0);
2606   }
2607 
2608   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2609   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2610   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2611   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2612 
2613   /* Get offdiagIdx[] for implicit 0.0 */
2614   ba = b->a;
2615   bi = b->i;
2616   bj = b->j;
2617   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2618   for (r = 0; r < m; r++) {
2619     ncols = bi[r+1] - bi[r];
2620     if (ncols == A->cmap->N - n) { /* Brow is dense */
2621       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2622     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2623       offdiagA[r] = 0.0;
2624 
2625       /* Find first hole in the cmap */
2626       for (j=0; j<ncols; j++) {
2627         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2628         if (col > j && j < cstart) {
2629           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2630           break;
2631         } else if (col > j + n && j >= cstart) {
2632           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2633           break;
2634         }
2635       }
2636       if (j == ncols && ncols < A->cmap->N - n) {
2637         /* a hole is outside compressed Bcols */
2638         if (ncols == 0) {
2639           if (cstart) {
2640             offdiagIdx[r] = 0;
2641           } else offdiagIdx[r] = cend;
2642         } else { /* ncols > 0 */
2643           offdiagIdx[r] = cmap[ncols-1] + 1;
2644           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2645         }
2646       }
2647     }
2648 
2649     for (j=0; j<ncols; j++) {
2650       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2651       ba++; bj++;
2652     }
2653   }
2654 
2655   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2656   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2657   for (r = 0; r < m; ++r) {
2658     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2659       a[r] = diagA[r];
2660       if (idx) idx[r] = cstart + diagIdx[r];
2661     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2662       a[r] = diagA[r];
2663       if (idx) {
2664         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2665           idx[r] = cstart + diagIdx[r];
2666         } else idx[r] = offdiagIdx[r];
2667       }
2668     } else {
2669       a[r] = offdiagA[r];
2670       if (idx) idx[r] = offdiagIdx[r];
2671     }
2672   }
2673   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2674   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2675   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2676   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2677   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2678   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2683 {
2684   PetscErrorCode ierr;
2685   Mat            *dummy;
2686 
2687   PetscFunctionBegin;
2688   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2689   *newmat = *dummy;
2690   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2695 {
2696   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2697   PetscErrorCode ierr;
2698 
2699   PetscFunctionBegin;
2700   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2701   A->factorerrortype = a->A->factorerrortype;
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2706 {
2707   PetscErrorCode ierr;
2708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2709 
2710   PetscFunctionBegin;
2711   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2712   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2713   if (x->assembled) {
2714     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2715   } else {
2716     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2717   }
2718   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2719   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2724 {
2725   PetscFunctionBegin;
2726   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2727   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 /*@
2732    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2733 
2734    Collective on Mat
2735 
2736    Input Parameters:
2737 +    A - the matrix
2738 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2739 
2740  Level: advanced
2741 
2742 @*/
2743 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2744 {
2745   PetscErrorCode       ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2753 {
2754   PetscErrorCode       ierr;
2755   PetscBool            sc = PETSC_FALSE,flg;
2756 
2757   PetscFunctionBegin;
2758   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2759   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2760   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2761   if (flg) {
2762     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2763   }
2764   ierr = PetscOptionsTail();CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2769 {
2770   PetscErrorCode ierr;
2771   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2772   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2773 
2774   PetscFunctionBegin;
2775   if (!Y->preallocated) {
2776     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2777   } else if (!aij->nz) {
2778     PetscInt nonew = aij->nonew;
2779     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2780     aij->nonew = nonew;
2781   }
2782   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2787 {
2788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2793   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2794   if (d) {
2795     PetscInt rstart;
2796     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2797     *d += rstart;
2798 
2799   }
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2804 {
2805   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2806   PetscErrorCode ierr;
2807 
2808   PetscFunctionBegin;
2809   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 /* -------------------------------------------------------------------*/
2814 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2815                                        MatGetRow_MPIAIJ,
2816                                        MatRestoreRow_MPIAIJ,
2817                                        MatMult_MPIAIJ,
2818                                 /* 4*/ MatMultAdd_MPIAIJ,
2819                                        MatMultTranspose_MPIAIJ,
2820                                        MatMultTransposeAdd_MPIAIJ,
2821                                        NULL,
2822                                        NULL,
2823                                        NULL,
2824                                 /*10*/ NULL,
2825                                        NULL,
2826                                        NULL,
2827                                        MatSOR_MPIAIJ,
2828                                        MatTranspose_MPIAIJ,
2829                                 /*15*/ MatGetInfo_MPIAIJ,
2830                                        MatEqual_MPIAIJ,
2831                                        MatGetDiagonal_MPIAIJ,
2832                                        MatDiagonalScale_MPIAIJ,
2833                                        MatNorm_MPIAIJ,
2834                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2835                                        MatAssemblyEnd_MPIAIJ,
2836                                        MatSetOption_MPIAIJ,
2837                                        MatZeroEntries_MPIAIJ,
2838                                 /*24*/ MatZeroRows_MPIAIJ,
2839                                        NULL,
2840                                        NULL,
2841                                        NULL,
2842                                        NULL,
2843                                 /*29*/ MatSetUp_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        MatGetDiagonalBlock_MPIAIJ,
2847                                        NULL,
2848                                 /*34*/ MatDuplicate_MPIAIJ,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        NULL,
2853                                 /*39*/ MatAXPY_MPIAIJ,
2854                                        MatCreateSubMatrices_MPIAIJ,
2855                                        MatIncreaseOverlap_MPIAIJ,
2856                                        MatGetValues_MPIAIJ,
2857                                        MatCopy_MPIAIJ,
2858                                 /*44*/ MatGetRowMax_MPIAIJ,
2859                                        MatScale_MPIAIJ,
2860                                        MatShift_MPIAIJ,
2861                                        MatDiagonalSet_MPIAIJ,
2862                                        MatZeroRowsColumns_MPIAIJ,
2863                                 /*49*/ MatSetRandom_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        NULL,
2867                                        NULL,
2868                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2869                                        NULL,
2870                                        MatSetUnfactored_MPIAIJ,
2871                                        MatPermute_MPIAIJ,
2872                                        NULL,
2873                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2874                                        MatDestroy_MPIAIJ,
2875                                        MatView_MPIAIJ,
2876                                        NULL,
2877                                        NULL,
2878                                 /*64*/ NULL,
2879                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2880                                        NULL,
2881                                        NULL,
2882                                        NULL,
2883                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2884                                        MatGetRowMinAbs_MPIAIJ,
2885                                        NULL,
2886                                        NULL,
2887                                        NULL,
2888                                        NULL,
2889                                 /*75*/ MatFDColoringApply_AIJ,
2890                                        MatSetFromOptions_MPIAIJ,
2891                                        NULL,
2892                                        NULL,
2893                                        MatFindZeroDiagonals_MPIAIJ,
2894                                 /*80*/ NULL,
2895                                        NULL,
2896                                        NULL,
2897                                 /*83*/ MatLoad_MPIAIJ,
2898                                        MatIsSymmetric_MPIAIJ,
2899                                        NULL,
2900                                        NULL,
2901                                        NULL,
2902                                        NULL,
2903                                 /*89*/ NULL,
2904                                        NULL,
2905                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2906                                        NULL,
2907                                        NULL,
2908                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2909                                        NULL,
2910                                        NULL,
2911                                        NULL,
2912                                        MatBindToCPU_MPIAIJ,
2913                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2914                                        NULL,
2915                                        NULL,
2916                                        MatConjugate_MPIAIJ,
2917                                        NULL,
2918                                 /*104*/MatSetValuesRow_MPIAIJ,
2919                                        MatRealPart_MPIAIJ,
2920                                        MatImaginaryPart_MPIAIJ,
2921                                        NULL,
2922                                        NULL,
2923                                 /*109*/NULL,
2924                                        NULL,
2925                                        MatGetRowMin_MPIAIJ,
2926                                        NULL,
2927                                        MatMissingDiagonal_MPIAIJ,
2928                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2929                                        NULL,
2930                                        MatGetGhosts_MPIAIJ,
2931                                        NULL,
2932                                        NULL,
2933                                 /*119*/NULL,
2934                                        NULL,
2935                                        NULL,
2936                                        NULL,
2937                                        MatGetMultiProcBlock_MPIAIJ,
2938                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2939                                        MatGetColumnNorms_MPIAIJ,
2940                                        MatInvertBlockDiagonal_MPIAIJ,
2941                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2942                                        MatCreateSubMatricesMPI_MPIAIJ,
2943                                 /*129*/NULL,
2944                                        NULL,
2945                                        NULL,
2946                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2947                                        NULL,
2948                                 /*134*/NULL,
2949                                        NULL,
2950                                        NULL,
2951                                        NULL,
2952                                        NULL,
2953                                 /*139*/MatSetBlockSizes_MPIAIJ,
2954                                        NULL,
2955                                        NULL,
2956                                        MatFDColoringSetUp_MPIXAIJ,
2957                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2958                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2959                                 /*145*/NULL,
2960                                        NULL,
2961                                        NULL
2962 };
2963 
2964 /* ----------------------------------------------------------------------------------------*/
2965 
2966 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2967 {
2968   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2969   PetscErrorCode ierr;
2970 
2971   PetscFunctionBegin;
2972   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2973   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2978 {
2979   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2984   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2989 {
2990   Mat_MPIAIJ     *b;
2991   PetscErrorCode ierr;
2992   PetscMPIInt    size;
2993 
2994   PetscFunctionBegin;
2995   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2996   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2997   b = (Mat_MPIAIJ*)B->data;
2998 
2999 #if defined(PETSC_USE_CTABLE)
3000   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3001 #else
3002   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3003 #endif
3004   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3005   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3006   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3007 
3008   /* Because the B will have been resized we simply destroy it and create a new one each time */
3009   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3010   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3011   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3012   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
3013   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3014   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3015   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3016 
3017   if (!B->preallocated) {
3018     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3019     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3020     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3021     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3022     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3023   }
3024 
3025   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3026   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3027   B->preallocated  = PETSC_TRUE;
3028   B->was_assembled = PETSC_FALSE;
3029   B->assembled     = PETSC_FALSE;
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3034 {
3035   Mat_MPIAIJ     *b;
3036   PetscErrorCode ierr;
3037 
3038   PetscFunctionBegin;
3039   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3040   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3041   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3042   b = (Mat_MPIAIJ*)B->data;
3043 
3044 #if defined(PETSC_USE_CTABLE)
3045   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3046 #else
3047   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3048 #endif
3049   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3050   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3051   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3052 
3053   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3054   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3055   B->preallocated  = PETSC_TRUE;
3056   B->was_assembled = PETSC_FALSE;
3057   B->assembled = PETSC_FALSE;
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3062 {
3063   Mat            mat;
3064   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3065   PetscErrorCode ierr;
3066 
3067   PetscFunctionBegin;
3068   *newmat = NULL;
3069   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3070   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3071   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3072   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3073   a       = (Mat_MPIAIJ*)mat->data;
3074 
3075   mat->factortype   = matin->factortype;
3076   mat->assembled    = matin->assembled;
3077   mat->insertmode   = NOT_SET_VALUES;
3078   mat->preallocated = matin->preallocated;
3079 
3080   a->size         = oldmat->size;
3081   a->rank         = oldmat->rank;
3082   a->donotstash   = oldmat->donotstash;
3083   a->roworiented  = oldmat->roworiented;
3084   a->rowindices   = NULL;
3085   a->rowvalues    = NULL;
3086   a->getrowactive = PETSC_FALSE;
3087 
3088   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3089   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3090 
3091   if (oldmat->colmap) {
3092 #if defined(PETSC_USE_CTABLE)
3093     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3094 #else
3095     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3096     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3097     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3098 #endif
3099   } else a->colmap = NULL;
3100   if (oldmat->garray) {
3101     PetscInt len;
3102     len  = oldmat->B->cmap->n;
3103     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3104     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3105     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3106   } else a->garray = NULL;
3107 
3108   /* It may happen MatDuplicate is called with a non-assembled matrix
3109      In fact, MatDuplicate only requires the matrix to be preallocated
3110      This may happen inside a DMCreateMatrix_Shell */
3111   if (oldmat->lvec) {
3112     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3113     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3114   }
3115   if (oldmat->Mvctx) {
3116     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3117     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3118   }
3119   if (oldmat->Mvctx_mpi1) {
3120     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
3121     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
3122   }
3123 
3124   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3125   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3126   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3127   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3128   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3129   *newmat = mat;
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3134 {
3135   PetscBool      isbinary, ishdf5;
3136   PetscErrorCode ierr;
3137 
3138   PetscFunctionBegin;
3139   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3140   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3141   /* force binary viewer to load .info file if it has not yet done so */
3142   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3143   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3144   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3145   if (isbinary) {
3146     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3147   } else if (ishdf5) {
3148 #if defined(PETSC_HAVE_HDF5)
3149     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3150 #else
3151     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3152 #endif
3153   } else {
3154     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3155   }
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3160 {
3161   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3162   PetscInt       *rowidxs,*colidxs;
3163   PetscScalar    *matvals;
3164   PetscErrorCode ierr;
3165 
3166   PetscFunctionBegin;
3167   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3168 
3169   /* read in matrix header */
3170   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3171   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3172   M  = header[1]; N = header[2]; nz = header[3];
3173   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3174   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3175   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3176 
3177   /* set block sizes from the viewer's .info file */
3178   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3179   /* set global sizes if not set already */
3180   if (mat->rmap->N < 0) mat->rmap->N = M;
3181   if (mat->cmap->N < 0) mat->cmap->N = N;
3182   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3183   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3184 
3185   /* check if the matrix sizes are correct */
3186   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3187   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3188 
3189   /* read in row lengths and build row indices */
3190   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3191   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3192   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3193   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3194   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3195   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3196   /* read in column indices and matrix values */
3197   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3198   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3199   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3200   /* store matrix indices and values */
3201   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3202   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3203   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 /* Not scalable because of ISAllGather() unless getting all columns. */
3208 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3209 {
3210   PetscErrorCode ierr;
3211   IS             iscol_local;
3212   PetscBool      isstride;
3213   PetscMPIInt    lisstride=0,gisstride;
3214 
3215   PetscFunctionBegin;
3216   /* check if we are grabbing all columns*/
3217   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3218 
3219   if (isstride) {
3220     PetscInt  start,len,mstart,mlen;
3221     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3222     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3223     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3224     if (mstart == start && mlen-mstart == len) lisstride = 1;
3225   }
3226 
3227   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3228   if (gisstride) {
3229     PetscInt N;
3230     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3231     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3232     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3233     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3234   } else {
3235     PetscInt cbs;
3236     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3237     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3238     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3239   }
3240 
3241   *isseq = iscol_local;
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 /*
3246  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3247  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3248 
3249  Input Parameters:
3250    mat - matrix
3251    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3252            i.e., mat->rstart <= isrow[i] < mat->rend
3253    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3254            i.e., mat->cstart <= iscol[i] < mat->cend
3255  Output Parameter:
3256    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3257    iscol_o - sequential column index set for retrieving mat->B
3258    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3259  */
3260 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3261 {
3262   PetscErrorCode ierr;
3263   Vec            x,cmap;
3264   const PetscInt *is_idx;
3265   PetscScalar    *xarray,*cmaparray;
3266   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3267   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3268   Mat            B=a->B;
3269   Vec            lvec=a->lvec,lcmap;
3270   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3271   MPI_Comm       comm;
3272   VecScatter     Mvctx=a->Mvctx;
3273 
3274   PetscFunctionBegin;
3275   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3276   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3277 
3278   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3279   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3280   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3281   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3282   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3283 
3284   /* Get start indices */
3285   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3286   isstart -= ncols;
3287   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3288 
3289   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3290   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3291   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3292   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3293   for (i=0; i<ncols; i++) {
3294     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3295     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3296     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3297   }
3298   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3299   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3300   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3301 
3302   /* Get iscol_d */
3303   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3304   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3305   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3306 
3307   /* Get isrow_d */
3308   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3309   rstart = mat->rmap->rstart;
3310   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3311   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3312   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3313   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3314 
3315   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3316   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3317   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3318 
3319   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3320   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3321   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3322 
3323   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3324 
3325   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3326   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3327 
3328   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3329   /* off-process column indices */
3330   count = 0;
3331   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3332   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3333 
3334   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3335   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3336   for (i=0; i<Bn; i++) {
3337     if (PetscRealPart(xarray[i]) > -1.0) {
3338       idx[count]     = i;                   /* local column index in off-diagonal part B */
3339       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3340       count++;
3341     }
3342   }
3343   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3344   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3345 
3346   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3347   /* cannot ensure iscol_o has same blocksize as iscol! */
3348 
3349   ierr = PetscFree(idx);CHKERRQ(ierr);
3350   *garray = cmap1;
3351 
3352   ierr = VecDestroy(&x);CHKERRQ(ierr);
3353   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3354   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3355   PetscFunctionReturn(0);
3356 }
3357 
3358 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3359 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3360 {
3361   PetscErrorCode ierr;
3362   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3363   Mat            M = NULL;
3364   MPI_Comm       comm;
3365   IS             iscol_d,isrow_d,iscol_o;
3366   Mat            Asub = NULL,Bsub = NULL;
3367   PetscInt       n;
3368 
3369   PetscFunctionBegin;
3370   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3371 
3372   if (call == MAT_REUSE_MATRIX) {
3373     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3374     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3375     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3376 
3377     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3378     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3379 
3380     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3381     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3382 
3383     /* Update diagonal and off-diagonal portions of submat */
3384     asub = (Mat_MPIAIJ*)(*submat)->data;
3385     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3386     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3387     if (n) {
3388       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3389     }
3390     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3391     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3392 
3393   } else { /* call == MAT_INITIAL_MATRIX) */
3394     const PetscInt *garray;
3395     PetscInt        BsubN;
3396 
3397     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3398     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3399 
3400     /* Create local submatrices Asub and Bsub */
3401     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3402     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3403 
3404     /* Create submatrix M */
3405     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3406 
3407     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3408     asub = (Mat_MPIAIJ*)M->data;
3409 
3410     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3411     n = asub->B->cmap->N;
3412     if (BsubN > n) {
3413       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3414       const PetscInt *idx;
3415       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3416       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3417 
3418       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3419       j = 0;
3420       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3421       for (i=0; i<n; i++) {
3422         if (j >= BsubN) break;
3423         while (subgarray[i] > garray[j]) j++;
3424 
3425         if (subgarray[i] == garray[j]) {
3426           idx_new[i] = idx[j++];
3427         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3428       }
3429       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3430 
3431       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3432       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3433 
3434     } else if (BsubN < n) {
3435       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3436     }
3437 
3438     ierr = PetscFree(garray);CHKERRQ(ierr);
3439     *submat = M;
3440 
3441     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3442     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3443     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3444 
3445     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3446     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3447 
3448     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3449     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3450   }
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3455 {
3456   PetscErrorCode ierr;
3457   IS             iscol_local=NULL,isrow_d;
3458   PetscInt       csize;
3459   PetscInt       n,i,j,start,end;
3460   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3461   MPI_Comm       comm;
3462 
3463   PetscFunctionBegin;
3464   /* If isrow has same processor distribution as mat,
3465      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3466   if (call == MAT_REUSE_MATRIX) {
3467     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3468     if (isrow_d) {
3469       sameRowDist  = PETSC_TRUE;
3470       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3471     } else {
3472       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3473       if (iscol_local) {
3474         sameRowDist  = PETSC_TRUE;
3475         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3476       }
3477     }
3478   } else {
3479     /* Check if isrow has same processor distribution as mat */
3480     sameDist[0] = PETSC_FALSE;
3481     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3482     if (!n) {
3483       sameDist[0] = PETSC_TRUE;
3484     } else {
3485       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3486       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3487       if (i >= start && j < end) {
3488         sameDist[0] = PETSC_TRUE;
3489       }
3490     }
3491 
3492     /* Check if iscol has same processor distribution as mat */
3493     sameDist[1] = PETSC_FALSE;
3494     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3495     if (!n) {
3496       sameDist[1] = PETSC_TRUE;
3497     } else {
3498       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3499       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3500       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3501     }
3502 
3503     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3504     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3505     sameRowDist = tsameDist[0];
3506   }
3507 
3508   if (sameRowDist) {
3509     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3510       /* isrow and iscol have same processor distribution as mat */
3511       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3512       PetscFunctionReturn(0);
3513     } else { /* sameRowDist */
3514       /* isrow has same processor distribution as mat */
3515       if (call == MAT_INITIAL_MATRIX) {
3516         PetscBool sorted;
3517         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3518         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3519         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3520         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3521 
3522         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3523         if (sorted) {
3524           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3525           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3526           PetscFunctionReturn(0);
3527         }
3528       } else { /* call == MAT_REUSE_MATRIX */
3529         IS    iscol_sub;
3530         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3531         if (iscol_sub) {
3532           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3533           PetscFunctionReturn(0);
3534         }
3535       }
3536     }
3537   }
3538 
3539   /* General case: iscol -> iscol_local which has global size of iscol */
3540   if (call == MAT_REUSE_MATRIX) {
3541     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3542     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3543   } else {
3544     if (!iscol_local) {
3545       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3546     }
3547   }
3548 
3549   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3550   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3551 
3552   if (call == MAT_INITIAL_MATRIX) {
3553     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3554     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3555   }
3556   PetscFunctionReturn(0);
3557 }
3558 
3559 /*@C
3560      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3561          and "off-diagonal" part of the matrix in CSR format.
3562 
3563    Collective
3564 
3565    Input Parameters:
3566 +  comm - MPI communicator
3567 .  A - "diagonal" portion of matrix
3568 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3569 -  garray - global index of B columns
3570 
3571    Output Parameter:
3572 .   mat - the matrix, with input A as its local diagonal matrix
3573    Level: advanced
3574 
3575    Notes:
3576        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3577        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3578 
3579 .seealso: MatCreateMPIAIJWithSplitArrays()
3580 @*/
3581 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3582 {
3583   PetscErrorCode ierr;
3584   Mat_MPIAIJ     *maij;
3585   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3586   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3587   PetscScalar    *oa=b->a;
3588   Mat            Bnew;
3589   PetscInt       m,n,N;
3590 
3591   PetscFunctionBegin;
3592   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3593   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3594   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3595   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3596   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3597   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3598 
3599   /* Get global columns of mat */
3600   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3601 
3602   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3603   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3604   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3605   maij = (Mat_MPIAIJ*)(*mat)->data;
3606 
3607   (*mat)->preallocated = PETSC_TRUE;
3608 
3609   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3610   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3611 
3612   /* Set A as diagonal portion of *mat */
3613   maij->A = A;
3614 
3615   nz = oi[m];
3616   for (i=0; i<nz; i++) {
3617     col   = oj[i];
3618     oj[i] = garray[col];
3619   }
3620 
3621    /* Set Bnew as off-diagonal portion of *mat */
3622   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3623   bnew        = (Mat_SeqAIJ*)Bnew->data;
3624   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3625   maij->B     = Bnew;
3626 
3627   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3628 
3629   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3630   b->free_a       = PETSC_FALSE;
3631   b->free_ij      = PETSC_FALSE;
3632   ierr = MatDestroy(&B);CHKERRQ(ierr);
3633 
3634   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3635   bnew->free_a       = PETSC_TRUE;
3636   bnew->free_ij      = PETSC_TRUE;
3637 
3638   /* condense columns of maij->B */
3639   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3640   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3642   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3643   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3644   PetscFunctionReturn(0);
3645 }
3646 
3647 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3648 
3649 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3650 {
3651   PetscErrorCode ierr;
3652   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3653   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3654   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3655   Mat            M,Msub,B=a->B;
3656   MatScalar      *aa;
3657   Mat_SeqAIJ     *aij;
3658   PetscInt       *garray = a->garray,*colsub,Ncols;
3659   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3660   IS             iscol_sub,iscmap;
3661   const PetscInt *is_idx,*cmap;
3662   PetscBool      allcolumns=PETSC_FALSE;
3663   MPI_Comm       comm;
3664 
3665   PetscFunctionBegin;
3666   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3667 
3668   if (call == MAT_REUSE_MATRIX) {
3669     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3670     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3671     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3672 
3673     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3674     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3675 
3676     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3677     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3678 
3679     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3680 
3681   } else { /* call == MAT_INITIAL_MATRIX) */
3682     PetscBool flg;
3683 
3684     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3685     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3686 
3687     /* (1) iscol -> nonscalable iscol_local */
3688     /* Check for special case: each processor gets entire matrix columns */
3689     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3690     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3691     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3692     if (allcolumns) {
3693       iscol_sub = iscol_local;
3694       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3695       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3696 
3697     } else {
3698       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3699       PetscInt *idx,*cmap1,k;
3700       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3701       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3702       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3703       count = 0;
3704       k     = 0;
3705       for (i=0; i<Ncols; i++) {
3706         j = is_idx[i];
3707         if (j >= cstart && j < cend) {
3708           /* diagonal part of mat */
3709           idx[count]     = j;
3710           cmap1[count++] = i; /* column index in submat */
3711         } else if (Bn) {
3712           /* off-diagonal part of mat */
3713           if (j == garray[k]) {
3714             idx[count]     = j;
3715             cmap1[count++] = i;  /* column index in submat */
3716           } else if (j > garray[k]) {
3717             while (j > garray[k] && k < Bn-1) k++;
3718             if (j == garray[k]) {
3719               idx[count]     = j;
3720               cmap1[count++] = i; /* column index in submat */
3721             }
3722           }
3723         }
3724       }
3725       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3726 
3727       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3728       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3729       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3730 
3731       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3732     }
3733 
3734     /* (3) Create sequential Msub */
3735     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3736   }
3737 
3738   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3739   aij  = (Mat_SeqAIJ*)(Msub)->data;
3740   ii   = aij->i;
3741   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3742 
3743   /*
3744       m - number of local rows
3745       Ncols - number of columns (same on all processors)
3746       rstart - first row in new global matrix generated
3747   */
3748   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3749 
3750   if (call == MAT_INITIAL_MATRIX) {
3751     /* (4) Create parallel newmat */
3752     PetscMPIInt    rank,size;
3753     PetscInt       csize;
3754 
3755     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3756     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3757 
3758     /*
3759         Determine the number of non-zeros in the diagonal and off-diagonal
3760         portions of the matrix in order to do correct preallocation
3761     */
3762 
3763     /* first get start and end of "diagonal" columns */
3764     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3765     if (csize == PETSC_DECIDE) {
3766       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3767       if (mglobal == Ncols) { /* square matrix */
3768         nlocal = m;
3769       } else {
3770         nlocal = Ncols/size + ((Ncols % size) > rank);
3771       }
3772     } else {
3773       nlocal = csize;
3774     }
3775     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3776     rstart = rend - nlocal;
3777     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3778 
3779     /* next, compute all the lengths */
3780     jj    = aij->j;
3781     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3782     olens = dlens + m;
3783     for (i=0; i<m; i++) {
3784       jend = ii[i+1] - ii[i];
3785       olen = 0;
3786       dlen = 0;
3787       for (j=0; j<jend; j++) {
3788         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3789         else dlen++;
3790         jj++;
3791       }
3792       olens[i] = olen;
3793       dlens[i] = dlen;
3794     }
3795 
3796     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3797     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3798 
3799     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3800     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3801     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3802     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3803     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3804     ierr = PetscFree(dlens);CHKERRQ(ierr);
3805 
3806   } else { /* call == MAT_REUSE_MATRIX */
3807     M    = *newmat;
3808     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3809     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3810     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3811     /*
3812          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3813        rather than the slower MatSetValues().
3814     */
3815     M->was_assembled = PETSC_TRUE;
3816     M->assembled     = PETSC_FALSE;
3817   }
3818 
3819   /* (5) Set values of Msub to *newmat */
3820   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3821   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3822 
3823   jj   = aij->j;
3824   aa   = aij->a;
3825   for (i=0; i<m; i++) {
3826     row = rstart + i;
3827     nz  = ii[i+1] - ii[i];
3828     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3829     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3830     jj += nz; aa += nz;
3831   }
3832   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3833 
3834   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3835   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3836 
3837   ierr = PetscFree(colsub);CHKERRQ(ierr);
3838 
3839   /* save Msub, iscol_sub and iscmap used in processor for next request */
3840   if (call ==  MAT_INITIAL_MATRIX) {
3841     *newmat = M;
3842     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3843     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3844 
3845     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3846     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3847 
3848     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3849     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3850 
3851     if (iscol_local) {
3852       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3853       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3854     }
3855   }
3856   PetscFunctionReturn(0);
3857 }
3858 
3859 /*
3860     Not great since it makes two copies of the submatrix, first an SeqAIJ
3861   in local and then by concatenating the local matrices the end result.
3862   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3863 
3864   Note: This requires a sequential iscol with all indices.
3865 */
3866 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3867 {
3868   PetscErrorCode ierr;
3869   PetscMPIInt    rank,size;
3870   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3871   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3872   Mat            M,Mreuse;
3873   MatScalar      *aa,*vwork;
3874   MPI_Comm       comm;
3875   Mat_SeqAIJ     *aij;
3876   PetscBool      colflag,allcolumns=PETSC_FALSE;
3877 
3878   PetscFunctionBegin;
3879   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3880   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3881   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3882 
3883   /* Check for special case: each processor gets entire matrix columns */
3884   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3885   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3886   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3887   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3888 
3889   if (call ==  MAT_REUSE_MATRIX) {
3890     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3891     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3892     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3893   } else {
3894     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3895   }
3896 
3897   /*
3898       m - number of local rows
3899       n - number of columns (same on all processors)
3900       rstart - first row in new global matrix generated
3901   */
3902   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3903   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3904   if (call == MAT_INITIAL_MATRIX) {
3905     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3906     ii  = aij->i;
3907     jj  = aij->j;
3908 
3909     /*
3910         Determine the number of non-zeros in the diagonal and off-diagonal
3911         portions of the matrix in order to do correct preallocation
3912     */
3913 
3914     /* first get start and end of "diagonal" columns */
3915     if (csize == PETSC_DECIDE) {
3916       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3917       if (mglobal == n) { /* square matrix */
3918         nlocal = m;
3919       } else {
3920         nlocal = n/size + ((n % size) > rank);
3921       }
3922     } else {
3923       nlocal = csize;
3924     }
3925     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3926     rstart = rend - nlocal;
3927     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3928 
3929     /* next, compute all the lengths */
3930     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3931     olens = dlens + m;
3932     for (i=0; i<m; i++) {
3933       jend = ii[i+1] - ii[i];
3934       olen = 0;
3935       dlen = 0;
3936       for (j=0; j<jend; j++) {
3937         if (*jj < rstart || *jj >= rend) olen++;
3938         else dlen++;
3939         jj++;
3940       }
3941       olens[i] = olen;
3942       dlens[i] = dlen;
3943     }
3944     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3945     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3946     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3947     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3948     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3949     ierr = PetscFree(dlens);CHKERRQ(ierr);
3950   } else {
3951     PetscInt ml,nl;
3952 
3953     M    = *newmat;
3954     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3955     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3956     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3957     /*
3958          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3959        rather than the slower MatSetValues().
3960     */
3961     M->was_assembled = PETSC_TRUE;
3962     M->assembled     = PETSC_FALSE;
3963   }
3964   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3965   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3966   ii   = aij->i;
3967   jj   = aij->j;
3968   aa   = aij->a;
3969   for (i=0; i<m; i++) {
3970     row   = rstart + i;
3971     nz    = ii[i+1] - ii[i];
3972     cwork = jj;     jj += nz;
3973     vwork = aa;     aa += nz;
3974     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3975   }
3976 
3977   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3978   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979   *newmat = M;
3980 
3981   /* save submatrix used in processor for next request */
3982   if (call ==  MAT_INITIAL_MATRIX) {
3983     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3984     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3985   }
3986   PetscFunctionReturn(0);
3987 }
3988 
3989 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3990 {
3991   PetscInt       m,cstart, cend,j,nnz,i,d;
3992   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3993   const PetscInt *JJ;
3994   PetscErrorCode ierr;
3995   PetscBool      nooffprocentries;
3996 
3997   PetscFunctionBegin;
3998   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3999 
4000   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
4001   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
4002   m      = B->rmap->n;
4003   cstart = B->cmap->rstart;
4004   cend   = B->cmap->rend;
4005   rstart = B->rmap->rstart;
4006 
4007   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
4008 
4009   if (PetscDefined(USE_DEBUG)) {
4010     for (i=0; i<m; i++) {
4011       nnz = Ii[i+1]- Ii[i];
4012       JJ  = J + Ii[i];
4013       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4014       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4015       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4016     }
4017   }
4018 
4019   for (i=0; i<m; i++) {
4020     nnz     = Ii[i+1]- Ii[i];
4021     JJ      = J + Ii[i];
4022     nnz_max = PetscMax(nnz_max,nnz);
4023     d       = 0;
4024     for (j=0; j<nnz; j++) {
4025       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4026     }
4027     d_nnz[i] = d;
4028     o_nnz[i] = nnz - d;
4029   }
4030   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4031   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4032 
4033   for (i=0; i<m; i++) {
4034     ii   = i + rstart;
4035     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4036   }
4037   nooffprocentries    = B->nooffprocentries;
4038   B->nooffprocentries = PETSC_TRUE;
4039   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4040   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4041   B->nooffprocentries = nooffprocentries;
4042 
4043   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 /*@
4048    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4049    (the default parallel PETSc format).
4050 
4051    Collective
4052 
4053    Input Parameters:
4054 +  B - the matrix
4055 .  i - the indices into j for the start of each local row (starts with zero)
4056 .  j - the column indices for each local row (starts with zero)
4057 -  v - optional values in the matrix
4058 
4059    Level: developer
4060 
4061    Notes:
4062        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4063      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4064      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4065 
4066        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4067 
4068        The format which is used for the sparse matrix input, is equivalent to a
4069     row-major ordering.. i.e for the following matrix, the input data expected is
4070     as shown
4071 
4072 $        1 0 0
4073 $        2 0 3     P0
4074 $       -------
4075 $        4 5 6     P1
4076 $
4077 $     Process0 [P0]: rows_owned=[0,1]
4078 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4079 $        j =  {0,0,2}  [size = 3]
4080 $        v =  {1,2,3}  [size = 3]
4081 $
4082 $     Process1 [P1]: rows_owned=[2]
4083 $        i =  {0,3}    [size = nrow+1  = 1+1]
4084 $        j =  {0,1,2}  [size = 3]
4085 $        v =  {4,5,6}  [size = 3]
4086 
4087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4088           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4089 @*/
4090 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4091 {
4092   PetscErrorCode ierr;
4093 
4094   PetscFunctionBegin;
4095   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4096   PetscFunctionReturn(0);
4097 }
4098 
4099 /*@C
4100    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4101    (the default parallel PETSc format).  For good matrix assembly performance
4102    the user should preallocate the matrix storage by setting the parameters
4103    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4104    performance can be increased by more than a factor of 50.
4105 
4106    Collective
4107 
4108    Input Parameters:
4109 +  B - the matrix
4110 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4111            (same value is used for all local rows)
4112 .  d_nnz - array containing the number of nonzeros in the various rows of the
4113            DIAGONAL portion of the local submatrix (possibly different for each row)
4114            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4115            The size of this array is equal to the number of local rows, i.e 'm'.
4116            For matrices that will be factored, you must leave room for (and set)
4117            the diagonal entry even if it is zero.
4118 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4119            submatrix (same value is used for all local rows).
4120 -  o_nnz - array containing the number of nonzeros in the various rows of the
4121            OFF-DIAGONAL portion of the local submatrix (possibly different for
4122            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4123            structure. The size of this array is equal to the number
4124            of local rows, i.e 'm'.
4125 
4126    If the *_nnz parameter is given then the *_nz parameter is ignored
4127 
4128    The AIJ format (also called the Yale sparse matrix format or
4129    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4130    storage.  The stored row and column indices begin with zero.
4131    See Users-Manual: ch_mat for details.
4132 
4133    The parallel matrix is partitioned such that the first m0 rows belong to
4134    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4135    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4136 
4137    The DIAGONAL portion of the local submatrix of a processor can be defined
4138    as the submatrix which is obtained by extraction the part corresponding to
4139    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4140    first row that belongs to the processor, r2 is the last row belonging to
4141    the this processor, and c1-c2 is range of indices of the local part of a
4142    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4143    common case of a square matrix, the row and column ranges are the same and
4144    the DIAGONAL part is also square. The remaining portion of the local
4145    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4146 
4147    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4148 
4149    You can call MatGetInfo() to get information on how effective the preallocation was;
4150    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4151    You can also run with the option -info and look for messages with the string
4152    malloc in them to see if additional memory allocation was needed.
4153 
4154    Example usage:
4155 
4156    Consider the following 8x8 matrix with 34 non-zero values, that is
4157    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4158    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4159    as follows:
4160 
4161 .vb
4162             1  2  0  |  0  3  0  |  0  4
4163     Proc0   0  5  6  |  7  0  0  |  8  0
4164             9  0 10  | 11  0  0  | 12  0
4165     -------------------------------------
4166            13  0 14  | 15 16 17  |  0  0
4167     Proc1   0 18  0  | 19 20 21  |  0  0
4168             0  0  0  | 22 23  0  | 24  0
4169     -------------------------------------
4170     Proc2  25 26 27  |  0  0 28  | 29  0
4171            30  0  0  | 31 32 33  |  0 34
4172 .ve
4173 
4174    This can be represented as a collection of submatrices as:
4175 
4176 .vb
4177       A B C
4178       D E F
4179       G H I
4180 .ve
4181 
4182    Where the submatrices A,B,C are owned by proc0, D,E,F are
4183    owned by proc1, G,H,I are owned by proc2.
4184 
4185    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4186    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4187    The 'M','N' parameters are 8,8, and have the same values on all procs.
4188 
4189    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4190    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4191    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4192    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4193    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4194    matrix, ans [DF] as another SeqAIJ matrix.
4195 
4196    When d_nz, o_nz parameters are specified, d_nz storage elements are
4197    allocated for every row of the local diagonal submatrix, and o_nz
4198    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4199    One way to choose d_nz and o_nz is to use the max nonzerors per local
4200    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4201    In this case, the values of d_nz,o_nz are:
4202 .vb
4203      proc0 : dnz = 2, o_nz = 2
4204      proc1 : dnz = 3, o_nz = 2
4205      proc2 : dnz = 1, o_nz = 4
4206 .ve
4207    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4208    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4209    for proc3. i.e we are using 12+15+10=37 storage locations to store
4210    34 values.
4211 
4212    When d_nnz, o_nnz parameters are specified, the storage is specified
4213    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4214    In the above case the values for d_nnz,o_nnz are:
4215 .vb
4216      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4217      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4218      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4219 .ve
4220    Here the space allocated is sum of all the above values i.e 34, and
4221    hence pre-allocation is perfect.
4222 
4223    Level: intermediate
4224 
4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4226           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4227 @*/
4228 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4229 {
4230   PetscErrorCode ierr;
4231 
4232   PetscFunctionBegin;
4233   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4234   PetscValidType(B,1);
4235   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 /*@
4240      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4241          CSR format for the local rows.
4242 
4243    Collective
4244 
4245    Input Parameters:
4246 +  comm - MPI communicator
4247 .  m - number of local rows (Cannot be PETSC_DECIDE)
4248 .  n - This value should be the same as the local size used in creating the
4249        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4250        calculated if N is given) For square matrices n is almost always m.
4251 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4252 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4253 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4254 .   j - column indices
4255 -   a - matrix values
4256 
4257    Output Parameter:
4258 .   mat - the matrix
4259 
4260    Level: intermediate
4261 
4262    Notes:
4263        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4264      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4265      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4266 
4267        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4268 
4269        The format which is used for the sparse matrix input, is equivalent to a
4270     row-major ordering.. i.e for the following matrix, the input data expected is
4271     as shown
4272 
4273        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4274 
4275 $        1 0 0
4276 $        2 0 3     P0
4277 $       -------
4278 $        4 5 6     P1
4279 $
4280 $     Process0 [P0]: rows_owned=[0,1]
4281 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4282 $        j =  {0,0,2}  [size = 3]
4283 $        v =  {1,2,3}  [size = 3]
4284 $
4285 $     Process1 [P1]: rows_owned=[2]
4286 $        i =  {0,3}    [size = nrow+1  = 1+1]
4287 $        j =  {0,1,2}  [size = 3]
4288 $        v =  {4,5,6}  [size = 3]
4289 
4290 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4291           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4292 @*/
4293 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4294 {
4295   PetscErrorCode ierr;
4296 
4297   PetscFunctionBegin;
4298   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4299   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4300   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4301   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4302   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4303   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4304   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4305   PetscFunctionReturn(0);
4306 }
4307 
4308 /*@
4309      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4310          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4311 
4312    Collective
4313 
4314    Input Parameters:
4315 +  mat - the matrix
4316 .  m - number of local rows (Cannot be PETSC_DECIDE)
4317 .  n - This value should be the same as the local size used in creating the
4318        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4319        calculated if N is given) For square matrices n is almost always m.
4320 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4321 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4322 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4323 .  J - column indices
4324 -  v - matrix values
4325 
4326    Level: intermediate
4327 
4328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4329           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4330 @*/
4331 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4332 {
4333   PetscErrorCode ierr;
4334   PetscInt       cstart,nnz,i,j;
4335   PetscInt       *ld;
4336   PetscBool      nooffprocentries;
4337   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4338   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4339   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4340   const PetscInt *Adi = Ad->i;
4341   PetscInt       ldi,Iii,md;
4342 
4343   PetscFunctionBegin;
4344   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4345   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4346   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4347   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4348 
4349   cstart = mat->cmap->rstart;
4350   if (!Aij->ld) {
4351     /* count number of entries below block diagonal */
4352     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4353     Aij->ld = ld;
4354     for (i=0; i<m; i++) {
4355       nnz  = Ii[i+1]- Ii[i];
4356       j     = 0;
4357       while  (J[j] < cstart && j < nnz) {j++;}
4358       J    += nnz;
4359       ld[i] = j;
4360     }
4361   } else {
4362     ld = Aij->ld;
4363   }
4364 
4365   for (i=0; i<m; i++) {
4366     nnz  = Ii[i+1]- Ii[i];
4367     Iii  = Ii[i];
4368     ldi  = ld[i];
4369     md   = Adi[i+1]-Adi[i];
4370     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4371     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4372     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4373     ad  += md;
4374     ao  += nnz - md;
4375   }
4376   nooffprocentries      = mat->nooffprocentries;
4377   mat->nooffprocentries = PETSC_TRUE;
4378   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4379   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4380   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4381   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4382   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   mat->nooffprocentries = nooffprocentries;
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 /*@C
4388    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4389    (the default parallel PETSc format).  For good matrix assembly performance
4390    the user should preallocate the matrix storage by setting the parameters
4391    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4392    performance can be increased by more than a factor of 50.
4393 
4394    Collective
4395 
4396    Input Parameters:
4397 +  comm - MPI communicator
4398 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4399            This value should be the same as the local size used in creating the
4400            y vector for the matrix-vector product y = Ax.
4401 .  n - This value should be the same as the local size used in creating the
4402        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4403        calculated if N is given) For square matrices n is almost always m.
4404 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4405 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4406 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4407            (same value is used for all local rows)
4408 .  d_nnz - array containing the number of nonzeros in the various rows of the
4409            DIAGONAL portion of the local submatrix (possibly different for each row)
4410            or NULL, if d_nz is used to specify the nonzero structure.
4411            The size of this array is equal to the number of local rows, i.e 'm'.
4412 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4413            submatrix (same value is used for all local rows).
4414 -  o_nnz - array containing the number of nonzeros in the various rows of the
4415            OFF-DIAGONAL portion of the local submatrix (possibly different for
4416            each row) or NULL, if o_nz is used to specify the nonzero
4417            structure. The size of this array is equal to the number
4418            of local rows, i.e 'm'.
4419 
4420    Output Parameter:
4421 .  A - the matrix
4422 
4423    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4424    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4425    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4426 
4427    Notes:
4428    If the *_nnz parameter is given then the *_nz parameter is ignored
4429 
4430    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4431    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4432    storage requirements for this matrix.
4433 
4434    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4435    processor than it must be used on all processors that share the object for
4436    that argument.
4437 
4438    The user MUST specify either the local or global matrix dimensions
4439    (possibly both).
4440 
4441    The parallel matrix is partitioned across processors such that the
4442    first m0 rows belong to process 0, the next m1 rows belong to
4443    process 1, the next m2 rows belong to process 2 etc.. where
4444    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4445    values corresponding to [m x N] submatrix.
4446 
4447    The columns are logically partitioned with the n0 columns belonging
4448    to 0th partition, the next n1 columns belonging to the next
4449    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4450 
4451    The DIAGONAL portion of the local submatrix on any given processor
4452    is the submatrix corresponding to the rows and columns m,n
4453    corresponding to the given processor. i.e diagonal matrix on
4454    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4455    etc. The remaining portion of the local submatrix [m x (N-n)]
4456    constitute the OFF-DIAGONAL portion. The example below better
4457    illustrates this concept.
4458 
4459    For a square global matrix we define each processor's diagonal portion
4460    to be its local rows and the corresponding columns (a square submatrix);
4461    each processor's off-diagonal portion encompasses the remainder of the
4462    local matrix (a rectangular submatrix).
4463 
4464    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4465 
4466    When calling this routine with a single process communicator, a matrix of
4467    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4468    type of communicator, use the construction mechanism
4469 .vb
4470      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4471 .ve
4472 
4473 $     MatCreate(...,&A);
4474 $     MatSetType(A,MATMPIAIJ);
4475 $     MatSetSizes(A, m,n,M,N);
4476 $     MatMPIAIJSetPreallocation(A,...);
4477 
4478    By default, this format uses inodes (identical nodes) when possible.
4479    We search for consecutive rows with the same nonzero structure, thereby
4480    reusing matrix information to achieve increased efficiency.
4481 
4482    Options Database Keys:
4483 +  -mat_no_inode  - Do not use inodes
4484 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4485 
4486 
4487 
4488    Example usage:
4489 
4490    Consider the following 8x8 matrix with 34 non-zero values, that is
4491    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4492    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4493    as follows
4494 
4495 .vb
4496             1  2  0  |  0  3  0  |  0  4
4497     Proc0   0  5  6  |  7  0  0  |  8  0
4498             9  0 10  | 11  0  0  | 12  0
4499     -------------------------------------
4500            13  0 14  | 15 16 17  |  0  0
4501     Proc1   0 18  0  | 19 20 21  |  0  0
4502             0  0  0  | 22 23  0  | 24  0
4503     -------------------------------------
4504     Proc2  25 26 27  |  0  0 28  | 29  0
4505            30  0  0  | 31 32 33  |  0 34
4506 .ve
4507 
4508    This can be represented as a collection of submatrices as
4509 
4510 .vb
4511       A B C
4512       D E F
4513       G H I
4514 .ve
4515 
4516    Where the submatrices A,B,C are owned by proc0, D,E,F are
4517    owned by proc1, G,H,I are owned by proc2.
4518 
4519    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4520    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4521    The 'M','N' parameters are 8,8, and have the same values on all procs.
4522 
4523    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4524    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4525    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4526    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4527    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4528    matrix, ans [DF] as another SeqAIJ matrix.
4529 
4530    When d_nz, o_nz parameters are specified, d_nz storage elements are
4531    allocated for every row of the local diagonal submatrix, and o_nz
4532    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4533    One way to choose d_nz and o_nz is to use the max nonzerors per local
4534    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4535    In this case, the values of d_nz,o_nz are
4536 .vb
4537      proc0 : dnz = 2, o_nz = 2
4538      proc1 : dnz = 3, o_nz = 2
4539      proc2 : dnz = 1, o_nz = 4
4540 .ve
4541    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4542    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4543    for proc3. i.e we are using 12+15+10=37 storage locations to store
4544    34 values.
4545 
4546    When d_nnz, o_nnz parameters are specified, the storage is specified
4547    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4548    In the above case the values for d_nnz,o_nnz are
4549 .vb
4550      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4551      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4552      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4553 .ve
4554    Here the space allocated is sum of all the above values i.e 34, and
4555    hence pre-allocation is perfect.
4556 
4557    Level: intermediate
4558 
4559 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4560           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4561 @*/
4562 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4563 {
4564   PetscErrorCode ierr;
4565   PetscMPIInt    size;
4566 
4567   PetscFunctionBegin;
4568   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4569   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4570   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4571   if (size > 1) {
4572     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4573     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4574   } else {
4575     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4576     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4577   }
4578   PetscFunctionReturn(0);
4579 }
4580 
4581 /*@C
4582   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4583 
4584   Not collective
4585 
4586   Input Parameter:
4587 . A - The MPIAIJ matrix
4588 
4589   Output Parameters:
4590 + Ad - The local diagonal block as a SeqAIJ matrix
4591 . Ao - The local off-diagonal block as a SeqAIJ matrix
4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4593 
4594   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4595   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4596   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4597   local column numbers to global column numbers in the original matrix.
4598 
4599   Level: intermediate
4600 
4601 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4602 @*/
4603 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4604 {
4605   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4606   PetscBool      flg;
4607   PetscErrorCode ierr;
4608 
4609   PetscFunctionBegin;
4610   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4611   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4612   if (Ad)     *Ad     = a->A;
4613   if (Ao)     *Ao     = a->B;
4614   if (colmap) *colmap = a->garray;
4615   PetscFunctionReturn(0);
4616 }
4617 
4618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4619 {
4620   PetscErrorCode ierr;
4621   PetscInt       m,N,i,rstart,nnz,Ii;
4622   PetscInt       *indx;
4623   PetscScalar    *values;
4624 
4625   PetscFunctionBegin;
4626   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4627   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4628     PetscInt       *dnz,*onz,sum,bs,cbs;
4629 
4630     if (n == PETSC_DECIDE) {
4631       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4632     }
4633     /* Check sum(n) = N */
4634     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4635     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4636 
4637     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4638     rstart -= m;
4639 
4640     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4641     for (i=0; i<m; i++) {
4642       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4643       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4644       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4645     }
4646 
4647     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4648     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4649     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4650     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4651     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4652     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4653     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4654     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4655   }
4656 
4657   /* numeric phase */
4658   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4659   for (i=0; i<m; i++) {
4660     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4661     Ii   = i + rstart;
4662     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4663     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4664   }
4665   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4666   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4667   PetscFunctionReturn(0);
4668 }
4669 
4670 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4671 {
4672   PetscErrorCode    ierr;
4673   PetscMPIInt       rank;
4674   PetscInt          m,N,i,rstart,nnz;
4675   size_t            len;
4676   const PetscInt    *indx;
4677   PetscViewer       out;
4678   char              *name;
4679   Mat               B;
4680   const PetscScalar *values;
4681 
4682   PetscFunctionBegin;
4683   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4684   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4685   /* Should this be the type of the diagonal block of A? */
4686   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4687   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4688   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4689   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4690   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4691   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4692   for (i=0; i<m; i++) {
4693     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4694     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4695     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4696   }
4697   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4698   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4699 
4700   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4701   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4702   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4703   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4704   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4705   ierr = PetscFree(name);CHKERRQ(ierr);
4706   ierr = MatView(B,out);CHKERRQ(ierr);
4707   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4708   ierr = MatDestroy(&B);CHKERRQ(ierr);
4709   PetscFunctionReturn(0);
4710 }
4711 
4712 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4713 {
4714   PetscErrorCode      ierr;
4715   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4716 
4717   PetscFunctionBegin;
4718   if (!merge) PetscFunctionReturn(0);
4719   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4720   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4721   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4722   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4723   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4724   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4725   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4726   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4727   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4728   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4729   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4730   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4731   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4732   ierr = PetscFree(merge);CHKERRQ(ierr);
4733   PetscFunctionReturn(0);
4734 }
4735 
4736 #include <../src/mat/utils/freespace.h>
4737 #include <petscbt.h>
4738 
4739 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4740 {
4741   PetscErrorCode      ierr;
4742   MPI_Comm            comm;
4743   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4744   PetscMPIInt         size,rank,taga,*len_s;
4745   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4746   PetscInt            proc,m;
4747   PetscInt            **buf_ri,**buf_rj;
4748   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4749   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4750   MPI_Request         *s_waits,*r_waits;
4751   MPI_Status          *status;
4752   MatScalar           *aa=a->a;
4753   MatScalar           **abuf_r,*ba_i;
4754   Mat_Merge_SeqsToMPI *merge;
4755   PetscContainer      container;
4756 
4757   PetscFunctionBegin;
4758   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4759   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4760 
4761   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4762   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4763 
4764   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4765   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4766   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4767 
4768   bi     = merge->bi;
4769   bj     = merge->bj;
4770   buf_ri = merge->buf_ri;
4771   buf_rj = merge->buf_rj;
4772 
4773   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4774   owners = merge->rowmap->range;
4775   len_s  = merge->len_s;
4776 
4777   /* send and recv matrix values */
4778   /*-----------------------------*/
4779   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4780   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4781 
4782   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4783   for (proc=0,k=0; proc<size; proc++) {
4784     if (!len_s[proc]) continue;
4785     i    = owners[proc];
4786     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4787     k++;
4788   }
4789 
4790   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4791   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4792   ierr = PetscFree(status);CHKERRQ(ierr);
4793 
4794   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4795   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4796 
4797   /* insert mat values of mpimat */
4798   /*----------------------------*/
4799   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4800   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4801 
4802   for (k=0; k<merge->nrecv; k++) {
4803     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804     nrows       = *(buf_ri_k[k]);
4805     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4806     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4807   }
4808 
4809   /* set values of ba */
4810   m = merge->rowmap->n;
4811   for (i=0; i<m; i++) {
4812     arow = owners[rank] + i;
4813     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4814     bnzi = bi[i+1] - bi[i];
4815     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4816 
4817     /* add local non-zero vals of this proc's seqmat into ba */
4818     anzi   = ai[arow+1] - ai[arow];
4819     aj     = a->j + ai[arow];
4820     aa     = a->a + ai[arow];
4821     nextaj = 0;
4822     for (j=0; nextaj<anzi; j++) {
4823       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4824         ba_i[j] += aa[nextaj++];
4825       }
4826     }
4827 
4828     /* add received vals into ba */
4829     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4830       /* i-th row */
4831       if (i == *nextrow[k]) {
4832         anzi   = *(nextai[k]+1) - *nextai[k];
4833         aj     = buf_rj[k] + *(nextai[k]);
4834         aa     = abuf_r[k] + *(nextai[k]);
4835         nextaj = 0;
4836         for (j=0; nextaj<anzi; j++) {
4837           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4838             ba_i[j] += aa[nextaj++];
4839           }
4840         }
4841         nextrow[k]++; nextai[k]++;
4842       }
4843     }
4844     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4845   }
4846   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4847   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4848 
4849   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4850   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4851   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4852   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4853   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4854   PetscFunctionReturn(0);
4855 }
4856 
4857 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4858 {
4859   PetscErrorCode      ierr;
4860   Mat                 B_mpi;
4861   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4862   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4863   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4864   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4865   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4866   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4867   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4868   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4869   MPI_Status          *status;
4870   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4871   PetscBT             lnkbt;
4872   Mat_Merge_SeqsToMPI *merge;
4873   PetscContainer      container;
4874 
4875   PetscFunctionBegin;
4876   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4877 
4878   /* make sure it is a PETSc comm */
4879   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4881   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4882 
4883   ierr = PetscNew(&merge);CHKERRQ(ierr);
4884   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4885 
4886   /* determine row ownership */
4887   /*---------------------------------------------------------*/
4888   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4889   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4890   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4891   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4892   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4893   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4894   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4895 
4896   m      = merge->rowmap->n;
4897   owners = merge->rowmap->range;
4898 
4899   /* determine the number of messages to send, their lengths */
4900   /*---------------------------------------------------------*/
4901   len_s = merge->len_s;
4902 
4903   len          = 0; /* length of buf_si[] */
4904   merge->nsend = 0;
4905   for (proc=0; proc<size; proc++) {
4906     len_si[proc] = 0;
4907     if (proc == rank) {
4908       len_s[proc] = 0;
4909     } else {
4910       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4911       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4912     }
4913     if (len_s[proc]) {
4914       merge->nsend++;
4915       nrows = 0;
4916       for (i=owners[proc]; i<owners[proc+1]; i++) {
4917         if (ai[i+1] > ai[i]) nrows++;
4918       }
4919       len_si[proc] = 2*(nrows+1);
4920       len         += len_si[proc];
4921     }
4922   }
4923 
4924   /* determine the number and length of messages to receive for ij-structure */
4925   /*-------------------------------------------------------------------------*/
4926   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4927   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4928 
4929   /* post the Irecv of j-structure */
4930   /*-------------------------------*/
4931   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4932   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4933 
4934   /* post the Isend of j-structure */
4935   /*--------------------------------*/
4936   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4937 
4938   for (proc=0, k=0; proc<size; proc++) {
4939     if (!len_s[proc]) continue;
4940     i    = owners[proc];
4941     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4942     k++;
4943   }
4944 
4945   /* receives and sends of j-structure are complete */
4946   /*------------------------------------------------*/
4947   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4948   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4949 
4950   /* send and recv i-structure */
4951   /*---------------------------*/
4952   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4953   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4954 
4955   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4956   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4957   for (proc=0,k=0; proc<size; proc++) {
4958     if (!len_s[proc]) continue;
4959     /* form outgoing message for i-structure:
4960          buf_si[0]:                 nrows to be sent
4961                [1:nrows]:           row index (global)
4962                [nrows+1:2*nrows+1]: i-structure index
4963     */
4964     /*-------------------------------------------*/
4965     nrows       = len_si[proc]/2 - 1;
4966     buf_si_i    = buf_si + nrows+1;
4967     buf_si[0]   = nrows;
4968     buf_si_i[0] = 0;
4969     nrows       = 0;
4970     for (i=owners[proc]; i<owners[proc+1]; i++) {
4971       anzi = ai[i+1] - ai[i];
4972       if (anzi) {
4973         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4974         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4975         nrows++;
4976       }
4977     }
4978     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4979     k++;
4980     buf_si += len_si[proc];
4981   }
4982 
4983   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4984   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4985 
4986   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4987   for (i=0; i<merge->nrecv; i++) {
4988     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4989   }
4990 
4991   ierr = PetscFree(len_si);CHKERRQ(ierr);
4992   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4993   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4994   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4995   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4996   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4997   ierr = PetscFree(status);CHKERRQ(ierr);
4998 
4999   /* compute a local seq matrix in each processor */
5000   /*----------------------------------------------*/
5001   /* allocate bi array and free space for accumulating nonzero column info */
5002   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
5003   bi[0] = 0;
5004 
5005   /* create and initialize a linked list */
5006   nlnk = N+1;
5007   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5008 
5009   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5010   len  = ai[owners[rank+1]] - ai[owners[rank]];
5011   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
5012 
5013   current_space = free_space;
5014 
5015   /* determine symbolic info for each local row */
5016   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5017 
5018   for (k=0; k<merge->nrecv; k++) {
5019     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5020     nrows       = *buf_ri_k[k];
5021     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5022     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5023   }
5024 
5025   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5026   len  = 0;
5027   for (i=0; i<m; i++) {
5028     bnzi = 0;
5029     /* add local non-zero cols of this proc's seqmat into lnk */
5030     arow  = owners[rank] + i;
5031     anzi  = ai[arow+1] - ai[arow];
5032     aj    = a->j + ai[arow];
5033     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5034     bnzi += nlnk;
5035     /* add received col data into lnk */
5036     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5037       if (i == *nextrow[k]) { /* i-th row */
5038         anzi  = *(nextai[k]+1) - *nextai[k];
5039         aj    = buf_rj[k] + *nextai[k];
5040         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5041         bnzi += nlnk;
5042         nextrow[k]++; nextai[k]++;
5043       }
5044     }
5045     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5046 
5047     /* if free space is not available, make more free space */
5048     if (current_space->local_remaining<bnzi) {
5049       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5050       nspacedouble++;
5051     }
5052     /* copy data into free space, then initialize lnk */
5053     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5054     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5055 
5056     current_space->array           += bnzi;
5057     current_space->local_used      += bnzi;
5058     current_space->local_remaining -= bnzi;
5059 
5060     bi[i+1] = bi[i] + bnzi;
5061   }
5062 
5063   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5064 
5065   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5066   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5067   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5068 
5069   /* create symbolic parallel matrix B_mpi */
5070   /*---------------------------------------*/
5071   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5072   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5073   if (n==PETSC_DECIDE) {
5074     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5075   } else {
5076     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5077   }
5078   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5079   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5080   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5081   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5082   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5083 
5084   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5085   B_mpi->assembled  = PETSC_FALSE;
5086   merge->bi         = bi;
5087   merge->bj         = bj;
5088   merge->buf_ri     = buf_ri;
5089   merge->buf_rj     = buf_rj;
5090   merge->coi        = NULL;
5091   merge->coj        = NULL;
5092   merge->owners_co  = NULL;
5093 
5094   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5095 
5096   /* attach the supporting struct to B_mpi for reuse */
5097   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5098   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5099   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5100   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5101   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5102   *mpimat = B_mpi;
5103 
5104   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5105   PetscFunctionReturn(0);
5106 }
5107 
5108 /*@C
5109       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5110                  matrices from each processor
5111 
5112     Collective
5113 
5114    Input Parameters:
5115 +    comm - the communicators the parallel matrix will live on
5116 .    seqmat - the input sequential matrices
5117 .    m - number of local rows (or PETSC_DECIDE)
5118 .    n - number of local columns (or PETSC_DECIDE)
5119 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5120 
5121    Output Parameter:
5122 .    mpimat - the parallel matrix generated
5123 
5124     Level: advanced
5125 
5126    Notes:
5127      The dimensions of the sequential matrix in each processor MUST be the same.
5128      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5129      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5130 @*/
5131 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5132 {
5133   PetscErrorCode ierr;
5134   PetscMPIInt    size;
5135 
5136   PetscFunctionBegin;
5137   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5138   if (size == 1) {
5139     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5140     if (scall == MAT_INITIAL_MATRIX) {
5141       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5142     } else {
5143       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5144     }
5145     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5146     PetscFunctionReturn(0);
5147   }
5148   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5149   if (scall == MAT_INITIAL_MATRIX) {
5150     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5151   }
5152   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5153   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5154   PetscFunctionReturn(0);
5155 }
5156 
5157 /*@
5158      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5159           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5160           with MatGetSize()
5161 
5162     Not Collective
5163 
5164    Input Parameters:
5165 +    A - the matrix
5166 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5167 
5168    Output Parameter:
5169 .    A_loc - the local sequential matrix generated
5170 
5171     Level: developer
5172 
5173    Notes:
5174      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5175      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5176      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5177      modify the values of the returned A_loc.
5178 
5179 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5180 
5181 @*/
5182 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5183 {
5184   PetscErrorCode ierr;
5185   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5186   Mat_SeqAIJ     *mat,*a,*b;
5187   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5188   MatScalar      *aa,*ba,*cam;
5189   PetscScalar    *ca;
5190   PetscMPIInt    size;
5191   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5192   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5193   PetscBool      match;
5194 
5195   PetscFunctionBegin;
5196   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5197   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5198   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5199   if (size == 1) {
5200     if (scall == MAT_INITIAL_MATRIX) {
5201       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5202       *A_loc = mpimat->A;
5203     } else if (scall == MAT_REUSE_MATRIX) {
5204       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5205     }
5206     PetscFunctionReturn(0);
5207   }
5208 
5209   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5210   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5211   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5212   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5213   aa = a->a; ba = b->a;
5214   if (scall == MAT_INITIAL_MATRIX) {
5215     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5216     ci[0] = 0;
5217     for (i=0; i<am; i++) {
5218       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5219     }
5220     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5221     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5222     k    = 0;
5223     for (i=0; i<am; i++) {
5224       ncols_o = bi[i+1] - bi[i];
5225       ncols_d = ai[i+1] - ai[i];
5226       /* off-diagonal portion of A */
5227       for (jo=0; jo<ncols_o; jo++) {
5228         col = cmap[*bj];
5229         if (col >= cstart) break;
5230         cj[k]   = col; bj++;
5231         ca[k++] = *ba++;
5232       }
5233       /* diagonal portion of A */
5234       for (j=0; j<ncols_d; j++) {
5235         cj[k]   = cstart + *aj++;
5236         ca[k++] = *aa++;
5237       }
5238       /* off-diagonal portion of A */
5239       for (j=jo; j<ncols_o; j++) {
5240         cj[k]   = cmap[*bj++];
5241         ca[k++] = *ba++;
5242       }
5243     }
5244     /* put together the new matrix */
5245     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5246     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5247     /* Since these are PETSc arrays, change flags to free them as necessary. */
5248     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5249     mat->free_a  = PETSC_TRUE;
5250     mat->free_ij = PETSC_TRUE;
5251     mat->nonew   = 0;
5252   } else if (scall == MAT_REUSE_MATRIX) {
5253     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5254     ci = mat->i; cj = mat->j; cam = mat->a;
5255     for (i=0; i<am; i++) {
5256       /* off-diagonal portion of A */
5257       ncols_o = bi[i+1] - bi[i];
5258       for (jo=0; jo<ncols_o; jo++) {
5259         col = cmap[*bj];
5260         if (col >= cstart) break;
5261         *cam++ = *ba++; bj++;
5262       }
5263       /* diagonal portion of A */
5264       ncols_d = ai[i+1] - ai[i];
5265       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5266       /* off-diagonal portion of A */
5267       for (j=jo; j<ncols_o; j++) {
5268         *cam++ = *ba++; bj++;
5269       }
5270     }
5271   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5272   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5273   PetscFunctionReturn(0);
5274 }
5275 
5276 /*@C
5277      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5278 
5279     Not Collective
5280 
5281    Input Parameters:
5282 +    A - the matrix
5283 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5284 -    row, col - index sets of rows and columns to extract (or NULL)
5285 
5286    Output Parameter:
5287 .    A_loc - the local sequential matrix generated
5288 
5289     Level: developer
5290 
5291 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5292 
5293 @*/
5294 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5295 {
5296   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5297   PetscErrorCode ierr;
5298   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5299   IS             isrowa,iscola;
5300   Mat            *aloc;
5301   PetscBool      match;
5302 
5303   PetscFunctionBegin;
5304   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5305   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5306   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5307   if (!row) {
5308     start = A->rmap->rstart; end = A->rmap->rend;
5309     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5310   } else {
5311     isrowa = *row;
5312   }
5313   if (!col) {
5314     start = A->cmap->rstart;
5315     cmap  = a->garray;
5316     nzA   = a->A->cmap->n;
5317     nzB   = a->B->cmap->n;
5318     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5319     ncols = 0;
5320     for (i=0; i<nzB; i++) {
5321       if (cmap[i] < start) idx[ncols++] = cmap[i];
5322       else break;
5323     }
5324     imark = i;
5325     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5326     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5327     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5328   } else {
5329     iscola = *col;
5330   }
5331   if (scall != MAT_INITIAL_MATRIX) {
5332     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5333     aloc[0] = *A_loc;
5334   }
5335   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5336   if (!col) { /* attach global id of condensed columns */
5337     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5338   }
5339   *A_loc = aloc[0];
5340   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5341   if (!row) {
5342     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5343   }
5344   if (!col) {
5345     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5346   }
5347   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5348   PetscFunctionReturn(0);
5349 }
5350 
5351 /*
5352  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5353  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5354  * on a global size.
5355  * */
5356 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5357 {
5358   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5359   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5360   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5361   PetscMPIInt              owner;
5362   PetscSFNode              *iremote,*oiremote;
5363   const PetscInt           *lrowindices;
5364   PetscErrorCode           ierr;
5365   PetscSF                  sf,osf;
5366   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5367   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5368   MPI_Comm                 comm;
5369   ISLocalToGlobalMapping   mapping;
5370 
5371   PetscFunctionBegin;
5372   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5373   /* plocalsize is the number of roots
5374    * nrows is the number of leaves
5375    * */
5376   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5377   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5378   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5379   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5380   for (i=0;i<nrows;i++) {
5381     /* Find a remote index and an owner for a row
5382      * The row could be local or remote
5383      * */
5384     owner = 0;
5385     lidx  = 0;
5386     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5387     iremote[i].index = lidx;
5388     iremote[i].rank  = owner;
5389   }
5390   /* Create SF to communicate how many nonzero columns for each row */
5391   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5392   /* SF will figure out the number of nonzero colunms for each row, and their
5393    * offsets
5394    * */
5395   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5396   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5397   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5398 
5399   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5400   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5401   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5402   roffsets[0] = 0;
5403   roffsets[1] = 0;
5404   for (i=0;i<plocalsize;i++) {
5405     /* diag */
5406     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5407     /* off diag */
5408     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5409     /* compute offsets so that we relative location for each row */
5410     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5411     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5412   }
5413   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5414   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5415   /* 'r' means root, and 'l' means leaf */
5416   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5417   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5418   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5419   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5420   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5421   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5422   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5423   dntotalcols = 0;
5424   ontotalcols = 0;
5425   ncol = 0;
5426   for (i=0;i<nrows;i++) {
5427     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5428     ncol = PetscMax(pnnz[i],ncol);
5429     /* diag */
5430     dntotalcols += nlcols[i*2+0];
5431     /* off diag */
5432     ontotalcols += nlcols[i*2+1];
5433   }
5434   /* We do not need to figure the right number of columns
5435    * since all the calculations will be done by going through the raw data
5436    * */
5437   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5438   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5439   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5440   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5441   /* diag */
5442   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5443   /* off diag */
5444   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5445   /* diag */
5446   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5447   /* off diag */
5448   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5449   dntotalcols = 0;
5450   ontotalcols = 0;
5451   ntotalcols  = 0;
5452   for (i=0;i<nrows;i++) {
5453     owner = 0;
5454     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5455     /* Set iremote for diag matrix */
5456     for (j=0;j<nlcols[i*2+0];j++) {
5457       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5458       iremote[dntotalcols].rank    = owner;
5459       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5460       ilocal[dntotalcols++]        = ntotalcols++;
5461     }
5462     /* off diag */
5463     for (j=0;j<nlcols[i*2+1];j++) {
5464       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5465       oiremote[ontotalcols].rank    = owner;
5466       oilocal[ontotalcols++]        = ntotalcols++;
5467     }
5468   }
5469   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5470   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5471   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5472   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5473   /* P serves as roots and P_oth is leaves
5474    * Diag matrix
5475    * */
5476   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5477   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5478   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5479 
5480   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5481   /* Off diag */
5482   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5483   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5484   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5485   /* We operate on the matrix internal data for saving memory */
5486   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5487   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5488   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5489   /* Convert to global indices for diag matrix */
5490   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5491   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5492   /* We want P_oth store global indices */
5493   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5494   /* Use memory scalable approach */
5495   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5496   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5497   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5498   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5499   /* Convert back to local indices */
5500   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5501   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5502   nout = 0;
5503   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5504   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5505   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5506   /* Exchange values */
5507   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5508   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5509   /* Stop PETSc from shrinking memory */
5510   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5511   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5512   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5513   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5514   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5515   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5516   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5517   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5518   PetscFunctionReturn(0);
5519 }
5520 
5521 /*
5522  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5523  * This supports MPIAIJ and MAIJ
5524  * */
5525 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5526 {
5527   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5528   Mat_SeqAIJ            *p_oth;
5529   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5530   IS                    rows,map;
5531   PetscHMapI            hamp;
5532   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5533   MPI_Comm              comm;
5534   PetscSF               sf,osf;
5535   PetscBool             has;
5536   PetscErrorCode        ierr;
5537 
5538   PetscFunctionBegin;
5539   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5540   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5541   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5542    *  and then create a submatrix (that often is an overlapping matrix)
5543    * */
5544   if (reuse == MAT_INITIAL_MATRIX) {
5545     /* Use a hash table to figure out unique keys */
5546     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5547     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5548     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5549     count = 0;
5550     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5551     for (i=0;i<a->B->cmap->n;i++) {
5552       key  = a->garray[i]/dof;
5553       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5554       if (!has) {
5555         mapping[i] = count;
5556         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5557       } else {
5558         /* Current 'i' has the same value the previous step */
5559         mapping[i] = count-1;
5560       }
5561     }
5562     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5563     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5564     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5565     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5566     off = 0;
5567     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5568     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5569     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5570     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5571     /* In case, the matrix was already created but users want to recreate the matrix */
5572     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5573     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5574     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5575     ierr = ISDestroy(&map);CHKERRQ(ierr);
5576     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5577   } else if (reuse == MAT_REUSE_MATRIX) {
5578     /* If matrix was already created, we simply update values using SF objects
5579      * that as attached to the matrix ealier.
5580      *  */
5581     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5582     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5583     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5584     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5585     /* Update values in place */
5586     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5587     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5588     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5589     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5590   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5591   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5592   PetscFunctionReturn(0);
5593 }
5594 
5595 /*@C
5596     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5597 
5598     Collective on Mat
5599 
5600    Input Parameters:
5601 +    A,B - the matrices in mpiaij format
5602 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5603 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5604 
5605    Output Parameter:
5606 +    rowb, colb - index sets of rows and columns of B to extract
5607 -    B_seq - the sequential matrix generated
5608 
5609     Level: developer
5610 
5611 @*/
5612 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5613 {
5614   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5615   PetscErrorCode ierr;
5616   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5617   IS             isrowb,iscolb;
5618   Mat            *bseq=NULL;
5619 
5620   PetscFunctionBegin;
5621   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5622     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5623   }
5624   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5625 
5626   if (scall == MAT_INITIAL_MATRIX) {
5627     start = A->cmap->rstart;
5628     cmap  = a->garray;
5629     nzA   = a->A->cmap->n;
5630     nzB   = a->B->cmap->n;
5631     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5632     ncols = 0;
5633     for (i=0; i<nzB; i++) {  /* row < local row index */
5634       if (cmap[i] < start) idx[ncols++] = cmap[i];
5635       else break;
5636     }
5637     imark = i;
5638     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5639     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5640     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5641     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5642   } else {
5643     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5644     isrowb  = *rowb; iscolb = *colb;
5645     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5646     bseq[0] = *B_seq;
5647   }
5648   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5649   *B_seq = bseq[0];
5650   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5651   if (!rowb) {
5652     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5653   } else {
5654     *rowb = isrowb;
5655   }
5656   if (!colb) {
5657     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5658   } else {
5659     *colb = iscolb;
5660   }
5661   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5662   PetscFunctionReturn(0);
5663 }
5664 
5665 /*
5666     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5667     of the OFF-DIAGONAL portion of local A
5668 
5669     Collective on Mat
5670 
5671    Input Parameters:
5672 +    A,B - the matrices in mpiaij format
5673 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5674 
5675    Output Parameter:
5676 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5677 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5678 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5679 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5680 
5681     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5682      for this matrix. This is not desirable..
5683 
5684     Level: developer
5685 
5686 */
5687 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5688 {
5689   PetscErrorCode         ierr;
5690   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5691   Mat_SeqAIJ             *b_oth;
5692   VecScatter             ctx;
5693   MPI_Comm               comm;
5694   const PetscMPIInt      *rprocs,*sprocs;
5695   const PetscInt         *srow,*rstarts,*sstarts;
5696   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5697   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5698   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5699   MPI_Request            *rwaits = NULL,*swaits = NULL;
5700   MPI_Status             rstatus;
5701   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5702 
5703   PetscFunctionBegin;
5704   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5705   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5706 
5707   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5708     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5709   }
5710   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5711   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5712 
5713   if (size == 1) {
5714     startsj_s = NULL;
5715     bufa_ptr  = NULL;
5716     *B_oth    = NULL;
5717     PetscFunctionReturn(0);
5718   }
5719 
5720   ctx = a->Mvctx;
5721   tag = ((PetscObject)ctx)->tag;
5722 
5723   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5724   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5725   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5726   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5727   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5728   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5729   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5730 
5731   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5732   if (scall == MAT_INITIAL_MATRIX) {
5733     /* i-array */
5734     /*---------*/
5735     /*  post receives */
5736     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5737     for (i=0; i<nrecvs; i++) {
5738       rowlen = rvalues + rstarts[i]*rbs;
5739       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5740       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5741     }
5742 
5743     /* pack the outgoing message */
5744     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5745 
5746     sstartsj[0] = 0;
5747     rstartsj[0] = 0;
5748     len         = 0; /* total length of j or a array to be sent */
5749     if (nsends) {
5750       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5751       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5752     }
5753     for (i=0; i<nsends; i++) {
5754       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5755       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5756       for (j=0; j<nrows; j++) {
5757         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5758         for (l=0; l<sbs; l++) {
5759           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5760 
5761           rowlen[j*sbs+l] = ncols;
5762 
5763           len += ncols;
5764           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5765         }
5766         k++;
5767       }
5768       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5769 
5770       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5771     }
5772     /* recvs and sends of i-array are completed */
5773     i = nrecvs;
5774     while (i--) {
5775       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5776     }
5777     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5778     ierr = PetscFree(svalues);CHKERRQ(ierr);
5779 
5780     /* allocate buffers for sending j and a arrays */
5781     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5782     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5783 
5784     /* create i-array of B_oth */
5785     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5786 
5787     b_othi[0] = 0;
5788     len       = 0; /* total length of j or a array to be received */
5789     k         = 0;
5790     for (i=0; i<nrecvs; i++) {
5791       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5792       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5793       for (j=0; j<nrows; j++) {
5794         b_othi[k+1] = b_othi[k] + rowlen[j];
5795         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5796         k++;
5797       }
5798       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5799     }
5800     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5801 
5802     /* allocate space for j and a arrrays of B_oth */
5803     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5804     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5805 
5806     /* j-array */
5807     /*---------*/
5808     /*  post receives of j-array */
5809     for (i=0; i<nrecvs; i++) {
5810       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5811       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5812     }
5813 
5814     /* pack the outgoing message j-array */
5815     if (nsends) k = sstarts[0];
5816     for (i=0; i<nsends; i++) {
5817       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5818       bufJ  = bufj+sstartsj[i];
5819       for (j=0; j<nrows; j++) {
5820         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5821         for (ll=0; ll<sbs; ll++) {
5822           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5823           for (l=0; l<ncols; l++) {
5824             *bufJ++ = cols[l];
5825           }
5826           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5827         }
5828       }
5829       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5830     }
5831 
5832     /* recvs and sends of j-array are completed */
5833     i = nrecvs;
5834     while (i--) {
5835       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5836     }
5837     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5838   } else if (scall == MAT_REUSE_MATRIX) {
5839     sstartsj = *startsj_s;
5840     rstartsj = *startsj_r;
5841     bufa     = *bufa_ptr;
5842     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5843     b_otha   = b_oth->a;
5844   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5845 
5846   /* a-array */
5847   /*---------*/
5848   /*  post receives of a-array */
5849   for (i=0; i<nrecvs; i++) {
5850     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5851     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5852   }
5853 
5854   /* pack the outgoing message a-array */
5855   if (nsends) k = sstarts[0];
5856   for (i=0; i<nsends; i++) {
5857     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5858     bufA  = bufa+sstartsj[i];
5859     for (j=0; j<nrows; j++) {
5860       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5861       for (ll=0; ll<sbs; ll++) {
5862         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5863         for (l=0; l<ncols; l++) {
5864           *bufA++ = vals[l];
5865         }
5866         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5867       }
5868     }
5869     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5870   }
5871   /* recvs and sends of a-array are completed */
5872   i = nrecvs;
5873   while (i--) {
5874     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5875   }
5876   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5877   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5878 
5879   if (scall == MAT_INITIAL_MATRIX) {
5880     /* put together the new matrix */
5881     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5882 
5883     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5884     /* Since these are PETSc arrays, change flags to free them as necessary. */
5885     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5886     b_oth->free_a  = PETSC_TRUE;
5887     b_oth->free_ij = PETSC_TRUE;
5888     b_oth->nonew   = 0;
5889 
5890     ierr = PetscFree(bufj);CHKERRQ(ierr);
5891     if (!startsj_s || !bufa_ptr) {
5892       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5893       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5894     } else {
5895       *startsj_s = sstartsj;
5896       *startsj_r = rstartsj;
5897       *bufa_ptr  = bufa;
5898     }
5899   }
5900 
5901   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5902   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5903   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5904   PetscFunctionReturn(0);
5905 }
5906 
5907 /*@C
5908   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5909 
5910   Not Collective
5911 
5912   Input Parameters:
5913 . A - The matrix in mpiaij format
5914 
5915   Output Parameter:
5916 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5917 . colmap - A map from global column index to local index into lvec
5918 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5919 
5920   Level: developer
5921 
5922 @*/
5923 #if defined(PETSC_USE_CTABLE)
5924 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5925 #else
5926 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5927 #endif
5928 {
5929   Mat_MPIAIJ *a;
5930 
5931   PetscFunctionBegin;
5932   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5933   PetscValidPointer(lvec, 2);
5934   PetscValidPointer(colmap, 3);
5935   PetscValidPointer(multScatter, 4);
5936   a = (Mat_MPIAIJ*) A->data;
5937   if (lvec) *lvec = a->lvec;
5938   if (colmap) *colmap = a->colmap;
5939   if (multScatter) *multScatter = a->Mvctx;
5940   PetscFunctionReturn(0);
5941 }
5942 
5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5946 #if defined(PETSC_HAVE_MKL_SPARSE)
5947 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5948 #endif
5949 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5951 #if defined(PETSC_HAVE_ELEMENTAL)
5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5953 #endif
5954 #if defined(PETSC_HAVE_SCALAPACK)
5955 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5956 #endif
5957 #if defined(PETSC_HAVE_HYPRE)
5958 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5959 #endif
5960 #if defined(PETSC_HAVE_CUDA)
5961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5962 #endif
5963 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5965 #endif
5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5967 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5968 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5969 
5970 /*
5971     Computes (B'*A')' since computing B*A directly is untenable
5972 
5973                n                       p                          p
5974         [             ]       [             ]         [                 ]
5975       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5976         [             ]       [             ]         [                 ]
5977 
5978 */
5979 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5980 {
5981   PetscErrorCode ierr;
5982   Mat            At,Bt,Ct;
5983 
5984   PetscFunctionBegin;
5985   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5986   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5987   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5988   ierr = MatDestroy(&At);CHKERRQ(ierr);
5989   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5990   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5991   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5992   PetscFunctionReturn(0);
5993 }
5994 
5995 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5996 {
5997   PetscErrorCode ierr;
5998   PetscBool      cisdense;
5999 
6000   PetscFunctionBegin;
6001   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
6002   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6003   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6004   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6005   if (!cisdense) {
6006     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6007   }
6008   ierr = MatSetUp(C);CHKERRQ(ierr);
6009 
6010   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6011   PetscFunctionReturn(0);
6012 }
6013 
6014 /* ----------------------------------------------------------------*/
6015 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6016 {
6017   Mat_Product *product = C->product;
6018   Mat         A = product->A,B=product->B;
6019 
6020   PetscFunctionBegin;
6021   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6022     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6023 
6024   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6025   C->ops->productsymbolic = MatProductSymbolic_AB;
6026   PetscFunctionReturn(0);
6027 }
6028 
6029 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6030 {
6031   PetscErrorCode ierr;
6032   Mat_Product    *product = C->product;
6033 
6034   PetscFunctionBegin;
6035   if (product->type == MATPRODUCT_AB) {
6036     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6037   }
6038   PetscFunctionReturn(0);
6039 }
6040 /* ----------------------------------------------------------------*/
6041 
6042 /*MC
6043    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6044 
6045    Options Database Keys:
6046 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6047 
6048    Level: beginner
6049 
6050    Notes:
6051     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6052     in this case the values associated with the rows and columns one passes in are set to zero
6053     in the matrix
6054 
6055     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6056     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6057 
6058 .seealso: MatCreateAIJ()
6059 M*/
6060 
6061 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6062 {
6063   Mat_MPIAIJ     *b;
6064   PetscErrorCode ierr;
6065   PetscMPIInt    size;
6066 
6067   PetscFunctionBegin;
6068   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6069 
6070   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6071   B->data       = (void*)b;
6072   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6073   B->assembled  = PETSC_FALSE;
6074   B->insertmode = NOT_SET_VALUES;
6075   b->size       = size;
6076 
6077   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6078 
6079   /* build cache for off array entries formed */
6080   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6081 
6082   b->donotstash  = PETSC_FALSE;
6083   b->colmap      = NULL;
6084   b->garray      = NULL;
6085   b->roworiented = PETSC_TRUE;
6086 
6087   /* stuff used for matrix vector multiply */
6088   b->lvec  = NULL;
6089   b->Mvctx = NULL;
6090 
6091   /* stuff for MatGetRow() */
6092   b->rowindices   = NULL;
6093   b->rowvalues    = NULL;
6094   b->getrowactive = PETSC_FALSE;
6095 
6096   /* flexible pointer used in CUSP/CUSPARSE classes */
6097   b->spptr = NULL;
6098 
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6109 #if defined(PETSC_HAVE_CUDA)
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6111 #endif
6112 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6114 #endif
6115 #if defined(PETSC_HAVE_MKL_SPARSE)
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6117 #endif
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6121 #if defined(PETSC_HAVE_ELEMENTAL)
6122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6123 #endif
6124 #if defined(PETSC_HAVE_SCALAPACK)
6125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6126 #endif
6127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6129 #if defined(PETSC_HAVE_HYPRE)
6130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6132 #endif
6133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6135   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6136   PetscFunctionReturn(0);
6137 }
6138 
6139 /*@C
6140      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6141          and "off-diagonal" part of the matrix in CSR format.
6142 
6143    Collective
6144 
6145    Input Parameters:
6146 +  comm - MPI communicator
6147 .  m - number of local rows (Cannot be PETSC_DECIDE)
6148 .  n - This value should be the same as the local size used in creating the
6149        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6150        calculated if N is given) For square matrices n is almost always m.
6151 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6152 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6153 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6154 .   j - column indices
6155 .   a - matrix values
6156 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6157 .   oj - column indices
6158 -   oa - matrix values
6159 
6160    Output Parameter:
6161 .   mat - the matrix
6162 
6163    Level: advanced
6164 
6165    Notes:
6166        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6167        must free the arrays once the matrix has been destroyed and not before.
6168 
6169        The i and j indices are 0 based
6170 
6171        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6172 
6173        This sets local rows and cannot be used to set off-processor values.
6174 
6175        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6176        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6177        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6178        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6179        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6180        communication if it is known that only local entries will be set.
6181 
6182 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6183           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6184 @*/
6185 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6186 {
6187   PetscErrorCode ierr;
6188   Mat_MPIAIJ     *maij;
6189 
6190   PetscFunctionBegin;
6191   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6192   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6193   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6194   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6195   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6196   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6197   maij = (Mat_MPIAIJ*) (*mat)->data;
6198 
6199   (*mat)->preallocated = PETSC_TRUE;
6200 
6201   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6202   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6203 
6204   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6205   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6206 
6207   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6208   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6209   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6210   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6211 
6212   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6213   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6214   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6215   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6216   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6217   PetscFunctionReturn(0);
6218 }
6219 
6220 /*
6221     Special version for direct calls from Fortran
6222 */
6223 #include <petsc/private/fortranimpl.h>
6224 
6225 /* Change these macros so can be used in void function */
6226 #undef CHKERRQ
6227 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6228 #undef SETERRQ2
6229 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6230 #undef SETERRQ3
6231 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6232 #undef SETERRQ
6233 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6234 
6235 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6236 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6237 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6238 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6239 #else
6240 #endif
6241 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6242 {
6243   Mat            mat  = *mmat;
6244   PetscInt       m    = *mm, n = *mn;
6245   InsertMode     addv = *maddv;
6246   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6247   PetscScalar    value;
6248   PetscErrorCode ierr;
6249 
6250   MatCheckPreallocated(mat,1);
6251   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6252   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6253   {
6254     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6255     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6256     PetscBool roworiented = aij->roworiented;
6257 
6258     /* Some Variables required in the macro */
6259     Mat        A                    = aij->A;
6260     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6261     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6262     MatScalar  *aa                  = a->a;
6263     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6264     Mat        B                    = aij->B;
6265     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6266     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6267     MatScalar  *ba                  = b->a;
6268     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6269      * cannot use "#if defined" inside a macro. */
6270     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6271 
6272     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6273     PetscInt  nonew = a->nonew;
6274     MatScalar *ap1,*ap2;
6275 
6276     PetscFunctionBegin;
6277     for (i=0; i<m; i++) {
6278       if (im[i] < 0) continue;
6279       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6280       if (im[i] >= rstart && im[i] < rend) {
6281         row      = im[i] - rstart;
6282         lastcol1 = -1;
6283         rp1      = aj + ai[row];
6284         ap1      = aa + ai[row];
6285         rmax1    = aimax[row];
6286         nrow1    = ailen[row];
6287         low1     = 0;
6288         high1    = nrow1;
6289         lastcol2 = -1;
6290         rp2      = bj + bi[row];
6291         ap2      = ba + bi[row];
6292         rmax2    = bimax[row];
6293         nrow2    = bilen[row];
6294         low2     = 0;
6295         high2    = nrow2;
6296 
6297         for (j=0; j<n; j++) {
6298           if (roworiented) value = v[i*n+j];
6299           else value = v[i+j*m];
6300           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6301           if (in[j] >= cstart && in[j] < cend) {
6302             col = in[j] - cstart;
6303             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6304 #if defined(PETSC_HAVE_DEVICE)
6305             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6306 #endif
6307           } else if (in[j] < 0) continue;
6308           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6309             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6310             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6311           } else {
6312             if (mat->was_assembled) {
6313               if (!aij->colmap) {
6314                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6315               }
6316 #if defined(PETSC_USE_CTABLE)
6317               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6318               col--;
6319 #else
6320               col = aij->colmap[in[j]] - 1;
6321 #endif
6322               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6323                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6324                 col  =  in[j];
6325                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6326                 B        = aij->B;
6327                 b        = (Mat_SeqAIJ*)B->data;
6328                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6329                 rp2      = bj + bi[row];
6330                 ap2      = ba + bi[row];
6331                 rmax2    = bimax[row];
6332                 nrow2    = bilen[row];
6333                 low2     = 0;
6334                 high2    = nrow2;
6335                 bm       = aij->B->rmap->n;
6336                 ba       = b->a;
6337                 inserted = PETSC_FALSE;
6338               }
6339             } else col = in[j];
6340             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6341 #if defined(PETSC_HAVE_DEVICE)
6342             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6343 #endif
6344           }
6345         }
6346       } else if (!aij->donotstash) {
6347         if (roworiented) {
6348           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6349         } else {
6350           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6351         }
6352       }
6353     }
6354   }
6355   PetscFunctionReturnVoid();
6356 }
6357