xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fa213d2f42a2adff90447074182cc202ca0ebb7c)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = NULL;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = NULL;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818   case MAT_SORTED_FULL:
1819     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1820     break;
1821   case MAT_IGNORE_OFF_PROC_ENTRIES:
1822     a->donotstash = flg;
1823     break;
1824   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1825   case MAT_SPD:
1826   case MAT_SYMMETRIC:
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828   case MAT_HERMITIAN:
1829   case MAT_SYMMETRY_ETERNAL:
1830     break;
1831   case MAT_SUBMAT_SINGLEIS:
1832     A->submat_singleis = flg;
1833     break;
1834   case MAT_STRUCTURE_ONLY:
1835     /* The option is handled directly by MatSetOption() */
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1844 {
1845   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1846   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1847   PetscErrorCode ierr;
1848   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1849   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1850   PetscInt       *cmap,*idx_p;
1851 
1852   PetscFunctionBegin;
1853   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1854   mat->getrowactive = PETSC_TRUE;
1855 
1856   if (!mat->rowvalues && (idx || v)) {
1857     /*
1858         allocate enough space to hold information from the longest row.
1859     */
1860     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1861     PetscInt   max = 1,tmp;
1862     for (i=0; i<matin->rmap->n; i++) {
1863       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1864       if (max < tmp) max = tmp;
1865     }
1866     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1867   }
1868 
1869   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1870   lrow = row - rstart;
1871 
1872   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1873   if (!v)   {pvA = NULL; pvB = NULL;}
1874   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1875   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1876   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1877   nztot = nzA + nzB;
1878 
1879   cmap = mat->garray;
1880   if (v  || idx) {
1881     if (nztot) {
1882       /* Sort by increasing column numbers, assuming A and B already sorted */
1883       PetscInt imark = -1;
1884       if (v) {
1885         *v = v_p = mat->rowvalues;
1886         for (i=0; i<nzB; i++) {
1887           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1888           else break;
1889         }
1890         imark = i;
1891         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1892         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1893       }
1894       if (idx) {
1895         *idx = idx_p = mat->rowindices;
1896         if (imark > -1) {
1897           for (i=0; i<imark; i++) {
1898             idx_p[i] = cmap[cworkB[i]];
1899           }
1900         } else {
1901           for (i=0; i<nzB; i++) {
1902             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1903             else break;
1904           }
1905           imark = i;
1906         }
1907         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1908         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1909       }
1910     } else {
1911       if (idx) *idx = NULL;
1912       if (v)   *v   = NULL;
1913     }
1914   }
1915   *nz  = nztot;
1916   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1917   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1922 {
1923   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1924 
1925   PetscFunctionBegin;
1926   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1927   aij->getrowactive = PETSC_FALSE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1932 {
1933   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1934   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       i,j,cstart = mat->cmap->rstart;
1937   PetscReal      sum = 0.0;
1938   MatScalar      *v;
1939 
1940   PetscFunctionBegin;
1941   if (aij->size == 1) {
1942     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1943   } else {
1944     if (type == NORM_FROBENIUS) {
1945       v = amat->a;
1946       for (i=0; i<amat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1960       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v     = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v   = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1999 {
2000   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2001   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2002   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2003   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2004   PetscErrorCode  ierr;
2005   Mat             B,A_diag,*B_diag;
2006   const MatScalar *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2237        will be updated */
2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2239     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2240       Y->offloadmask = PETSC_OFFLOAD_CPU;
2241     }
2242 #endif
2243   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2244     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2245   } else {
2246     Mat      B;
2247     PetscInt *nnz_d,*nnz_o;
2248     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2249     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2250     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2251     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2252     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2253     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2256     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2257     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2258     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2266 
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2283 {
2284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2289   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308   PetscInt       i,*idxb = NULL;
2309   PetscScalar    *va,*vb;
2310   Vec            vtmp;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2314   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2315   if (idx) {
2316     for (i=0; i<A->rmap->n; i++) {
2317       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2318     }
2319   }
2320 
2321   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2322   if (idx) {
2323     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2324   }
2325   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2326   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2327 
2328   for (i=0; i<A->rmap->n; i++) {
2329     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2330       va[i] = vb[i];
2331       if (idx) idx[i] = a->garray[idxb[i]];
2332     }
2333   }
2334 
2335   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2336   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2337   ierr = PetscFree(idxb);CHKERRQ(ierr);
2338   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2343 {
2344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2345   PetscErrorCode ierr;
2346   PetscInt       i,*idxb = NULL;
2347   PetscScalar    *va,*vb;
2348   Vec            vtmp;
2349 
2350   PetscFunctionBegin;
2351   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2352   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2353   if (idx) {
2354     for (i=0; i<A->cmap->n; i++) {
2355       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2356     }
2357   }
2358 
2359   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2360   if (idx) {
2361     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2362   }
2363   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2364   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2365 
2366   for (i=0; i<A->rmap->n; i++) {
2367     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2368       va[i] = vb[i];
2369       if (idx) idx[i] = a->garray[idxb[i]];
2370     }
2371   }
2372 
2373   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2375   ierr = PetscFree(idxb);CHKERRQ(ierr);
2376   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2381 {
2382   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2383   PetscInt       m = A->rmap->n,n = A->cmap->n;
2384   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2385   PetscInt       *cmap  = mat->garray;
2386   PetscInt       *diagIdx, *offdiagIdx;
2387   Vec            diagV, offdiagV;
2388   PetscScalar    *a, *diagA, *offdiagA, *ba;
2389   PetscInt       r,j,col,ncols,*bi,*bj;
2390   PetscErrorCode ierr;
2391   Mat            B = mat->B;
2392   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2393 
2394   PetscFunctionBegin;
2395   /* When a process holds entire A and other processes have no entry */
2396   if (A->cmap->N == n) {
2397     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2398     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2399     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2400     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2401     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2402     PetscFunctionReturn(0);
2403   } else if (n == 0) {
2404     if (m) {
2405       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2406       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2407       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2408     }
2409     PetscFunctionReturn(0);
2410   }
2411 
2412   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2413   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2414   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2415   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2416 
2417   /* Get offdiagIdx[] for implicit 0.0 */
2418   ba = b->a;
2419   bi = b->i;
2420   bj = b->j;
2421   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2422   for (r = 0; r < m; r++) {
2423     ncols = bi[r+1] - bi[r];
2424     if (ncols == A->cmap->N - n) { /* Brow is dense */
2425       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2426     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2427       offdiagA[r] = 0.0;
2428 
2429       /* Find first hole in the cmap */
2430       for (j=0; j<ncols; j++) {
2431         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2432         if (col > j && j < cstart) {
2433           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2434           break;
2435         } else if (col > j + n && j >= cstart) {
2436           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2437           break;
2438         }
2439       }
2440       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2441         /* a hole is outside compressed Bcols */
2442         if (ncols == 0) {
2443           if (cstart) {
2444             offdiagIdx[r] = 0;
2445           } else offdiagIdx[r] = cend;
2446         } else { /* ncols > 0 */
2447           offdiagIdx[r] = cmap[ncols-1] + 1;
2448           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2449         }
2450       }
2451     }
2452 
2453     for (j=0; j<ncols; j++) {
2454       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2455       ba++; bj++;
2456     }
2457   }
2458 
2459   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2460   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2461   for (r = 0; r < m; ++r) {
2462     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2463       a[r]   = diagA[r];
2464       if (idx) idx[r] = cstart + diagIdx[r];
2465     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2466       a[r] = diagA[r];
2467       if (idx) {
2468         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2469           idx[r] = cstart + diagIdx[r];
2470         } else idx[r] = offdiagIdx[r];
2471       }
2472     } else {
2473       a[r]   = offdiagA[r];
2474       if (idx) idx[r] = offdiagIdx[r];
2475     }
2476   }
2477   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2478   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2479   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2480   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2481   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2482   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2487 {
2488   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2489   PetscInt       m = A->rmap->n,n = A->cmap->n;
2490   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2491   PetscInt       *cmap  = mat->garray;
2492   PetscInt       *diagIdx, *offdiagIdx;
2493   Vec            diagV, offdiagV;
2494   PetscScalar    *a, *diagA, *offdiagA, *ba;
2495   PetscInt       r,j,col,ncols,*bi,*bj;
2496   PetscErrorCode ierr;
2497   Mat            B = mat->B;
2498   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2499 
2500   PetscFunctionBegin;
2501   /* When a process holds entire A and other processes have no entry */
2502   if (A->cmap->N == n) {
2503     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2504     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2505     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2506     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2507     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2508     PetscFunctionReturn(0);
2509   } else if (n == 0) {
2510     if (m) {
2511       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2512       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2513       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2514     }
2515     PetscFunctionReturn(0);
2516   }
2517 
2518   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2519   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2520   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2521   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2522 
2523   /* Get offdiagIdx[] for implicit 0.0 */
2524   ba = b->a;
2525   bi = b->i;
2526   bj = b->j;
2527   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2528   for (r = 0; r < m; r++) {
2529     ncols = bi[r+1] - bi[r];
2530     if (ncols == A->cmap->N - n) { /* Brow is dense */
2531       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2532     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2533       offdiagA[r] = 0.0;
2534 
2535       /* Find first hole in the cmap */
2536       for (j=0; j<ncols; j++) {
2537         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2538         if (col > j && j < cstart) {
2539           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2540           break;
2541         } else if (col > j + n && j >= cstart) {
2542           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2543           break;
2544         }
2545       }
2546       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2547         /* a hole is outside compressed Bcols */
2548         if (ncols == 0) {
2549           if (cstart) {
2550             offdiagIdx[r] = 0;
2551           } else offdiagIdx[r] = cend;
2552         } else { /* ncols > 0 */
2553           offdiagIdx[r] = cmap[ncols-1] + 1;
2554           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2555         }
2556       }
2557     }
2558 
2559     for (j=0; j<ncols; j++) {
2560       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2561       ba++; bj++;
2562     }
2563   }
2564 
2565   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2566   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2567   for (r = 0; r < m; ++r) {
2568     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2569       a[r] = diagA[r];
2570       if (idx) idx[r] = cstart + diagIdx[r];
2571     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2572       a[r] = diagA[r];
2573       if (idx) {
2574         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2575           idx[r] = cstart + diagIdx[r];
2576         } else idx[r] = offdiagIdx[r];
2577       }
2578     } else {
2579       a[r] = offdiagA[r];
2580       if (idx) idx[r] = offdiagIdx[r];
2581     }
2582   }
2583   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2584   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2585   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2586   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2587   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2588   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2593 {
2594   PetscErrorCode ierr;
2595   Mat            *dummy;
2596 
2597   PetscFunctionBegin;
2598   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2599   *newmat = *dummy;
2600   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2605 {
2606   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2607   PetscErrorCode ierr;
2608 
2609   PetscFunctionBegin;
2610   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2611   A->factorerrortype = a->A->factorerrortype;
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2616 {
2617   PetscErrorCode ierr;
2618   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2619 
2620   PetscFunctionBegin;
2621   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2622   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2623   if (x->assembled) {
2624     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2625   } else {
2626     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2627   }
2628   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2634 {
2635   PetscFunctionBegin;
2636   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2637   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2638   PetscFunctionReturn(0);
2639 }
2640 
2641 /*@
2642    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2643 
2644    Collective on Mat
2645 
2646    Input Parameters:
2647 +    A - the matrix
2648 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2649 
2650  Level: advanced
2651 
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2654 {
2655   PetscErrorCode       ierr;
2656 
2657   PetscFunctionBegin;
2658   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2663 {
2664   PetscErrorCode       ierr;
2665   PetscBool            sc = PETSC_FALSE,flg;
2666 
2667   PetscFunctionBegin;
2668   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2669   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2670   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2671   if (flg) {
2672     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2673   }
2674   ierr = PetscOptionsTail();CHKERRQ(ierr);
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2679 {
2680   PetscErrorCode ierr;
2681   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2682   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2683 
2684   PetscFunctionBegin;
2685   if (!Y->preallocated) {
2686     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2687   } else if (!aij->nz) {
2688     PetscInt nonew = aij->nonew;
2689     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2690     aij->nonew = nonew;
2691   }
2692   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2693   PetscFunctionReturn(0);
2694 }
2695 
2696 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2697 {
2698   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2699   PetscErrorCode ierr;
2700 
2701   PetscFunctionBegin;
2702   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2703   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2704   if (d) {
2705     PetscInt rstart;
2706     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2707     *d += rstart;
2708 
2709   }
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2714 {
2715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 /* -------------------------------------------------------------------*/
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                 /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                 /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                 /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                 /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                 /*29*/ MatSetUp_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                 /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                 /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                 /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                 /*49*/ MatSetRandom_MPIAIJ,
2774                                        NULL,
2775                                        NULL,
2776                                        NULL,
2777                                        NULL,
2778                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                 /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                 /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                 /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                 /*83*/ MatLoad_MPIAIJ,
2808                                        MatIsSymmetric_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                 /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                 /*104*/MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                 /*109*/NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                 /*119*/NULL,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnNorms_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                 /*129*/NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                 /*134*/NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                 /*139*/MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                 /*145*/NULL,
2870                                        NULL,
2871                                        NULL
2872 };
2873 
2874 /* ----------------------------------------------------------------------------------------*/
2875 
2876 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2877 {
2878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2879   PetscErrorCode ierr;
2880 
2881   PetscFunctionBegin;
2882   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2883   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2888 {
2889   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2890   PetscErrorCode ierr;
2891 
2892   PetscFunctionBegin;
2893   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2894   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2899 {
2900   Mat_MPIAIJ     *b;
2901   PetscErrorCode ierr;
2902   PetscMPIInt    size;
2903 
2904   PetscFunctionBegin;
2905   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2906   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2907   b = (Mat_MPIAIJ*)B->data;
2908 
2909 #if defined(PETSC_USE_CTABLE)
2910   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2911 #else
2912   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2913 #endif
2914   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2915   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2916   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2917 
2918   /* Because the B will have been resized we simply destroy it and create a new one each time */
2919   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2920   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2921   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2922   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2923   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2924   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2925   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2926 
2927   if (!B->preallocated) {
2928     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2929     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2930     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2931     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2932     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2933   }
2934 
2935   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2936   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2937   B->preallocated  = PETSC_TRUE;
2938   B->was_assembled = PETSC_FALSE;
2939   B->assembled     = PETSC_FALSE;
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2944 {
2945   Mat_MPIAIJ     *b;
2946   PetscErrorCode ierr;
2947 
2948   PetscFunctionBegin;
2949   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2950   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2951   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2952   b = (Mat_MPIAIJ*)B->data;
2953 
2954 #if defined(PETSC_USE_CTABLE)
2955   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2956 #else
2957   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2958 #endif
2959   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2960   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2961   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2962 
2963   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2964   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2965   B->preallocated  = PETSC_TRUE;
2966   B->was_assembled = PETSC_FALSE;
2967   B->assembled = PETSC_FALSE;
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2972 {
2973   Mat            mat;
2974   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2975   PetscErrorCode ierr;
2976 
2977   PetscFunctionBegin;
2978   *newmat = NULL;
2979   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2980   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2981   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2982   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2983   a       = (Mat_MPIAIJ*)mat->data;
2984 
2985   mat->factortype   = matin->factortype;
2986   mat->assembled    = matin->assembled;
2987   mat->insertmode   = NOT_SET_VALUES;
2988   mat->preallocated = matin->preallocated;
2989 
2990   a->size         = oldmat->size;
2991   a->rank         = oldmat->rank;
2992   a->donotstash   = oldmat->donotstash;
2993   a->roworiented  = oldmat->roworiented;
2994   a->rowindices   = NULL;
2995   a->rowvalues    = NULL;
2996   a->getrowactive = PETSC_FALSE;
2997 
2998   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2999   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3000 
3001   if (oldmat->colmap) {
3002 #if defined(PETSC_USE_CTABLE)
3003     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3004 #else
3005     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3006     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3007     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3008 #endif
3009   } else a->colmap = NULL;
3010   if (oldmat->garray) {
3011     PetscInt len;
3012     len  = oldmat->B->cmap->n;
3013     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3014     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3015     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3016   } else a->garray = NULL;
3017 
3018   /* It may happen MatDuplicate is called with a non-assembled matrix
3019      In fact, MatDuplicate only requires the matrix to be preallocated
3020      This may happen inside a DMCreateMatrix_Shell */
3021   if (oldmat->lvec) {
3022     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3023     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3024   }
3025   if (oldmat->Mvctx) {
3026     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3027     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3028   }
3029   if (oldmat->Mvctx_mpi1) {
3030     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
3031     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
3032   }
3033 
3034   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3035   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3036   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3037   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3038   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3039   *newmat = mat;
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3044 {
3045   PetscBool      isbinary, ishdf5;
3046   PetscErrorCode ierr;
3047 
3048   PetscFunctionBegin;
3049   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3050   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3051   /* force binary viewer to load .info file if it has not yet done so */
3052   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3053   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3054   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3055   if (isbinary) {
3056     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3057   } else if (ishdf5) {
3058 #if defined(PETSC_HAVE_HDF5)
3059     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3060 #else
3061     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3062 #endif
3063   } else {
3064     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3065   }
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3070 {
3071   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3072   PetscInt       *rowidxs,*colidxs;
3073   PetscScalar    *matvals;
3074   PetscErrorCode ierr;
3075 
3076   PetscFunctionBegin;
3077   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3078 
3079   /* read in matrix header */
3080   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3081   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3082   M  = header[1]; N = header[2]; nz = header[3];
3083   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3084   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3085   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3086 
3087   /* set block sizes from the viewer's .info file */
3088   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3089   /* set global sizes if not set already */
3090   if (mat->rmap->N < 0) mat->rmap->N = M;
3091   if (mat->cmap->N < 0) mat->cmap->N = N;
3092   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3093   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3094 
3095   /* check if the matrix sizes are correct */
3096   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3097   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3098 
3099   /* read in row lengths and build row indices */
3100   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3101   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3102   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3103   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3104   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3105   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3106   /* read in column indices and matrix values */
3107   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3108   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3109   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3110   /* store matrix indices and values */
3111   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3112   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3113   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3114   PetscFunctionReturn(0);
3115 }
3116 
3117 /* Not scalable because of ISAllGather() unless getting all columns. */
3118 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3119 {
3120   PetscErrorCode ierr;
3121   IS             iscol_local;
3122   PetscBool      isstride;
3123   PetscMPIInt    lisstride=0,gisstride;
3124 
3125   PetscFunctionBegin;
3126   /* check if we are grabbing all columns*/
3127   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3128 
3129   if (isstride) {
3130     PetscInt  start,len,mstart,mlen;
3131     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3132     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3133     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3134     if (mstart == start && mlen-mstart == len) lisstride = 1;
3135   }
3136 
3137   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3138   if (gisstride) {
3139     PetscInt N;
3140     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3141     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3142     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3143     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3144   } else {
3145     PetscInt cbs;
3146     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3147     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3148     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3149   }
3150 
3151   *isseq = iscol_local;
3152   PetscFunctionReturn(0);
3153 }
3154 
3155 /*
3156  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3157  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3158 
3159  Input Parameters:
3160    mat - matrix
3161    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3162            i.e., mat->rstart <= isrow[i] < mat->rend
3163    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3164            i.e., mat->cstart <= iscol[i] < mat->cend
3165  Output Parameter:
3166    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3167    iscol_o - sequential column index set for retrieving mat->B
3168    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3169  */
3170 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3171 {
3172   PetscErrorCode ierr;
3173   Vec            x,cmap;
3174   const PetscInt *is_idx;
3175   PetscScalar    *xarray,*cmaparray;
3176   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3177   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3178   Mat            B=a->B;
3179   Vec            lvec=a->lvec,lcmap;
3180   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3181   MPI_Comm       comm;
3182   VecScatter     Mvctx=a->Mvctx;
3183 
3184   PetscFunctionBegin;
3185   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3186   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3187 
3188   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3189   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3190   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3191   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3192   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3193 
3194   /* Get start indices */
3195   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3196   isstart -= ncols;
3197   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3198 
3199   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3200   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3201   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3202   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3203   for (i=0; i<ncols; i++) {
3204     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3205     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3206     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3207   }
3208   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3209   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3210   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3211 
3212   /* Get iscol_d */
3213   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3214   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3215   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3216 
3217   /* Get isrow_d */
3218   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3219   rstart = mat->rmap->rstart;
3220   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3221   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3222   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3223   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3224 
3225   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3226   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3227   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3228 
3229   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3230   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3231   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3232 
3233   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3234 
3235   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3236   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3237 
3238   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3239   /* off-process column indices */
3240   count = 0;
3241   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3242   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3243 
3244   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3245   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3246   for (i=0; i<Bn; i++) {
3247     if (PetscRealPart(xarray[i]) > -1.0) {
3248       idx[count]     = i;                   /* local column index in off-diagonal part B */
3249       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3250       count++;
3251     }
3252   }
3253   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3254   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3255 
3256   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3257   /* cannot ensure iscol_o has same blocksize as iscol! */
3258 
3259   ierr = PetscFree(idx);CHKERRQ(ierr);
3260   *garray = cmap1;
3261 
3262   ierr = VecDestroy(&x);CHKERRQ(ierr);
3263   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3264   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3269 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3270 {
3271   PetscErrorCode ierr;
3272   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3273   Mat            M = NULL;
3274   MPI_Comm       comm;
3275   IS             iscol_d,isrow_d,iscol_o;
3276   Mat            Asub = NULL,Bsub = NULL;
3277   PetscInt       n;
3278 
3279   PetscFunctionBegin;
3280   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3281 
3282   if (call == MAT_REUSE_MATRIX) {
3283     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3284     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3285     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3286 
3287     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3288     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3289 
3290     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3291     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3292 
3293     /* Update diagonal and off-diagonal portions of submat */
3294     asub = (Mat_MPIAIJ*)(*submat)->data;
3295     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3296     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3297     if (n) {
3298       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3299     }
3300     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3302 
3303   } else { /* call == MAT_INITIAL_MATRIX) */
3304     const PetscInt *garray;
3305     PetscInt        BsubN;
3306 
3307     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3308     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3309 
3310     /* Create local submatrices Asub and Bsub */
3311     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3312     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3313 
3314     /* Create submatrix M */
3315     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3316 
3317     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3318     asub = (Mat_MPIAIJ*)M->data;
3319 
3320     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3321     n = asub->B->cmap->N;
3322     if (BsubN > n) {
3323       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3324       const PetscInt *idx;
3325       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3326       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3327 
3328       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3329       j = 0;
3330       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3331       for (i=0; i<n; i++) {
3332         if (j >= BsubN) break;
3333         while (subgarray[i] > garray[j]) j++;
3334 
3335         if (subgarray[i] == garray[j]) {
3336           idx_new[i] = idx[j++];
3337         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3338       }
3339       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3340 
3341       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3342       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3343 
3344     } else if (BsubN < n) {
3345       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3346     }
3347 
3348     ierr = PetscFree(garray);CHKERRQ(ierr);
3349     *submat = M;
3350 
3351     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3352     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3353     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3354 
3355     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3356     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3357 
3358     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3359     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3360   }
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3365 {
3366   PetscErrorCode ierr;
3367   IS             iscol_local=NULL,isrow_d;
3368   PetscInt       csize;
3369   PetscInt       n,i,j,start,end;
3370   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3371   MPI_Comm       comm;
3372 
3373   PetscFunctionBegin;
3374   /* If isrow has same processor distribution as mat,
3375      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3376   if (call == MAT_REUSE_MATRIX) {
3377     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3378     if (isrow_d) {
3379       sameRowDist  = PETSC_TRUE;
3380       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3381     } else {
3382       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3383       if (iscol_local) {
3384         sameRowDist  = PETSC_TRUE;
3385         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3386       }
3387     }
3388   } else {
3389     /* Check if isrow has same processor distribution as mat */
3390     sameDist[0] = PETSC_FALSE;
3391     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3392     if (!n) {
3393       sameDist[0] = PETSC_TRUE;
3394     } else {
3395       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3396       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3397       if (i >= start && j < end) {
3398         sameDist[0] = PETSC_TRUE;
3399       }
3400     }
3401 
3402     /* Check if iscol has same processor distribution as mat */
3403     sameDist[1] = PETSC_FALSE;
3404     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3405     if (!n) {
3406       sameDist[1] = PETSC_TRUE;
3407     } else {
3408       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3409       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3410       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3411     }
3412 
3413     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3414     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3415     sameRowDist = tsameDist[0];
3416   }
3417 
3418   if (sameRowDist) {
3419     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3420       /* isrow and iscol have same processor distribution as mat */
3421       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3422       PetscFunctionReturn(0);
3423     } else { /* sameRowDist */
3424       /* isrow has same processor distribution as mat */
3425       if (call == MAT_INITIAL_MATRIX) {
3426         PetscBool sorted;
3427         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3428         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3429         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3430         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3431 
3432         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3433         if (sorted) {
3434           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3435           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3436           PetscFunctionReturn(0);
3437         }
3438       } else { /* call == MAT_REUSE_MATRIX */
3439         IS    iscol_sub;
3440         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3441         if (iscol_sub) {
3442           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3443           PetscFunctionReturn(0);
3444         }
3445       }
3446     }
3447   }
3448 
3449   /* General case: iscol -> iscol_local which has global size of iscol */
3450   if (call == MAT_REUSE_MATRIX) {
3451     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3452     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3453   } else {
3454     if (!iscol_local) {
3455       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3456     }
3457   }
3458 
3459   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3460   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3461 
3462   if (call == MAT_INITIAL_MATRIX) {
3463     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3464     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3465   }
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 /*@C
3470      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3471          and "off-diagonal" part of the matrix in CSR format.
3472 
3473    Collective
3474 
3475    Input Parameters:
3476 +  comm - MPI communicator
3477 .  A - "diagonal" portion of matrix
3478 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3479 -  garray - global index of B columns
3480 
3481    Output Parameter:
3482 .   mat - the matrix, with input A as its local diagonal matrix
3483    Level: advanced
3484 
3485    Notes:
3486        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3487        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3488 
3489 .seealso: MatCreateMPIAIJWithSplitArrays()
3490 @*/
3491 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3492 {
3493   PetscErrorCode ierr;
3494   Mat_MPIAIJ     *maij;
3495   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3496   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3497   PetscScalar    *oa=b->a;
3498   Mat            Bnew;
3499   PetscInt       m,n,N;
3500 
3501   PetscFunctionBegin;
3502   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3503   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3504   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3505   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3506   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3507   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3508 
3509   /* Get global columns of mat */
3510   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3511 
3512   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3513   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3514   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3515   maij = (Mat_MPIAIJ*)(*mat)->data;
3516 
3517   (*mat)->preallocated = PETSC_TRUE;
3518 
3519   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3520   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3521 
3522   /* Set A as diagonal portion of *mat */
3523   maij->A = A;
3524 
3525   nz = oi[m];
3526   for (i=0; i<nz; i++) {
3527     col   = oj[i];
3528     oj[i] = garray[col];
3529   }
3530 
3531    /* Set Bnew as off-diagonal portion of *mat */
3532   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3533   bnew        = (Mat_SeqAIJ*)Bnew->data;
3534   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3535   maij->B     = Bnew;
3536 
3537   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3538 
3539   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3540   b->free_a       = PETSC_FALSE;
3541   b->free_ij      = PETSC_FALSE;
3542   ierr = MatDestroy(&B);CHKERRQ(ierr);
3543 
3544   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3545   bnew->free_a       = PETSC_TRUE;
3546   bnew->free_ij      = PETSC_TRUE;
3547 
3548   /* condense columns of maij->B */
3549   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3550   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3551   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3552   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3553   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3554   PetscFunctionReturn(0);
3555 }
3556 
3557 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3558 
3559 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3560 {
3561   PetscErrorCode ierr;
3562   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3563   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3564   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3565   Mat            M,Msub,B=a->B;
3566   MatScalar      *aa;
3567   Mat_SeqAIJ     *aij;
3568   PetscInt       *garray = a->garray,*colsub,Ncols;
3569   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3570   IS             iscol_sub,iscmap;
3571   const PetscInt *is_idx,*cmap;
3572   PetscBool      allcolumns=PETSC_FALSE;
3573   MPI_Comm       comm;
3574 
3575   PetscFunctionBegin;
3576   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3577 
3578   if (call == MAT_REUSE_MATRIX) {
3579     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3580     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3581     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3582 
3583     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3584     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3585 
3586     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3587     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3588 
3589     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3590 
3591   } else { /* call == MAT_INITIAL_MATRIX) */
3592     PetscBool flg;
3593 
3594     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3595     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3596 
3597     /* (1) iscol -> nonscalable iscol_local */
3598     /* Check for special case: each processor gets entire matrix columns */
3599     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3600     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3601     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3602     if (allcolumns) {
3603       iscol_sub = iscol_local;
3604       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3605       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3606 
3607     } else {
3608       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3609       PetscInt *idx,*cmap1,k;
3610       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3611       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3612       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3613       count = 0;
3614       k     = 0;
3615       for (i=0; i<Ncols; i++) {
3616         j = is_idx[i];
3617         if (j >= cstart && j < cend) {
3618           /* diagonal part of mat */
3619           idx[count]     = j;
3620           cmap1[count++] = i; /* column index in submat */
3621         } else if (Bn) {
3622           /* off-diagonal part of mat */
3623           if (j == garray[k]) {
3624             idx[count]     = j;
3625             cmap1[count++] = i;  /* column index in submat */
3626           } else if (j > garray[k]) {
3627             while (j > garray[k] && k < Bn-1) k++;
3628             if (j == garray[k]) {
3629               idx[count]     = j;
3630               cmap1[count++] = i; /* column index in submat */
3631             }
3632           }
3633         }
3634       }
3635       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3636 
3637       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3638       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3639       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3640 
3641       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3642     }
3643 
3644     /* (3) Create sequential Msub */
3645     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3646   }
3647 
3648   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3649   aij  = (Mat_SeqAIJ*)(Msub)->data;
3650   ii   = aij->i;
3651   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3652 
3653   /*
3654       m - number of local rows
3655       Ncols - number of columns (same on all processors)
3656       rstart - first row in new global matrix generated
3657   */
3658   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3659 
3660   if (call == MAT_INITIAL_MATRIX) {
3661     /* (4) Create parallel newmat */
3662     PetscMPIInt    rank,size;
3663     PetscInt       csize;
3664 
3665     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3666     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3667 
3668     /*
3669         Determine the number of non-zeros in the diagonal and off-diagonal
3670         portions of the matrix in order to do correct preallocation
3671     */
3672 
3673     /* first get start and end of "diagonal" columns */
3674     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3675     if (csize == PETSC_DECIDE) {
3676       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3677       if (mglobal == Ncols) { /* square matrix */
3678         nlocal = m;
3679       } else {
3680         nlocal = Ncols/size + ((Ncols % size) > rank);
3681       }
3682     } else {
3683       nlocal = csize;
3684     }
3685     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3686     rstart = rend - nlocal;
3687     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3688 
3689     /* next, compute all the lengths */
3690     jj    = aij->j;
3691     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3692     olens = dlens + m;
3693     for (i=0; i<m; i++) {
3694       jend = ii[i+1] - ii[i];
3695       olen = 0;
3696       dlen = 0;
3697       for (j=0; j<jend; j++) {
3698         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3699         else dlen++;
3700         jj++;
3701       }
3702       olens[i] = olen;
3703       dlens[i] = dlen;
3704     }
3705 
3706     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3707     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3708 
3709     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3710     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3711     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3712     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3713     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3714     ierr = PetscFree(dlens);CHKERRQ(ierr);
3715 
3716   } else { /* call == MAT_REUSE_MATRIX */
3717     M    = *newmat;
3718     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3719     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3720     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3721     /*
3722          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3723        rather than the slower MatSetValues().
3724     */
3725     M->was_assembled = PETSC_TRUE;
3726     M->assembled     = PETSC_FALSE;
3727   }
3728 
3729   /* (5) Set values of Msub to *newmat */
3730   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3731   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3732 
3733   jj   = aij->j;
3734   aa   = aij->a;
3735   for (i=0; i<m; i++) {
3736     row = rstart + i;
3737     nz  = ii[i+1] - ii[i];
3738     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3739     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3740     jj += nz; aa += nz;
3741   }
3742   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3743 
3744   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3745   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3746 
3747   ierr = PetscFree(colsub);CHKERRQ(ierr);
3748 
3749   /* save Msub, iscol_sub and iscmap used in processor for next request */
3750   if (call ==  MAT_INITIAL_MATRIX) {
3751     *newmat = M;
3752     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3753     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3754 
3755     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3756     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3757 
3758     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3759     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3760 
3761     if (iscol_local) {
3762       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3763       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3764     }
3765   }
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 /*
3770     Not great since it makes two copies of the submatrix, first an SeqAIJ
3771   in local and then by concatenating the local matrices the end result.
3772   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3773 
3774   Note: This requires a sequential iscol with all indices.
3775 */
3776 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3777 {
3778   PetscErrorCode ierr;
3779   PetscMPIInt    rank,size;
3780   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3781   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3782   Mat            M,Mreuse;
3783   MatScalar      *aa,*vwork;
3784   MPI_Comm       comm;
3785   Mat_SeqAIJ     *aij;
3786   PetscBool      colflag,allcolumns=PETSC_FALSE;
3787 
3788   PetscFunctionBegin;
3789   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3790   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3791   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3792 
3793   /* Check for special case: each processor gets entire matrix columns */
3794   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3795   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3796   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3797   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3798 
3799   if (call ==  MAT_REUSE_MATRIX) {
3800     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3801     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3802     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3803   } else {
3804     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3805   }
3806 
3807   /*
3808       m - number of local rows
3809       n - number of columns (same on all processors)
3810       rstart - first row in new global matrix generated
3811   */
3812   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3813   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3814   if (call == MAT_INITIAL_MATRIX) {
3815     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3816     ii  = aij->i;
3817     jj  = aij->j;
3818 
3819     /*
3820         Determine the number of non-zeros in the diagonal and off-diagonal
3821         portions of the matrix in order to do correct preallocation
3822     */
3823 
3824     /* first get start and end of "diagonal" columns */
3825     if (csize == PETSC_DECIDE) {
3826       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3827       if (mglobal == n) { /* square matrix */
3828         nlocal = m;
3829       } else {
3830         nlocal = n/size + ((n % size) > rank);
3831       }
3832     } else {
3833       nlocal = csize;
3834     }
3835     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3836     rstart = rend - nlocal;
3837     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3838 
3839     /* next, compute all the lengths */
3840     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3841     olens = dlens + m;
3842     for (i=0; i<m; i++) {
3843       jend = ii[i+1] - ii[i];
3844       olen = 0;
3845       dlen = 0;
3846       for (j=0; j<jend; j++) {
3847         if (*jj < rstart || *jj >= rend) olen++;
3848         else dlen++;
3849         jj++;
3850       }
3851       olens[i] = olen;
3852       dlens[i] = dlen;
3853     }
3854     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3855     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3856     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3857     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3858     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3859     ierr = PetscFree(dlens);CHKERRQ(ierr);
3860   } else {
3861     PetscInt ml,nl;
3862 
3863     M    = *newmat;
3864     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3865     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3866     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3867     /*
3868          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3869        rather than the slower MatSetValues().
3870     */
3871     M->was_assembled = PETSC_TRUE;
3872     M->assembled     = PETSC_FALSE;
3873   }
3874   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3875   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3876   ii   = aij->i;
3877   jj   = aij->j;
3878   aa   = aij->a;
3879   for (i=0; i<m; i++) {
3880     row   = rstart + i;
3881     nz    = ii[i+1] - ii[i];
3882     cwork = jj;     jj += nz;
3883     vwork = aa;     aa += nz;
3884     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3885   }
3886 
3887   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3888   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3889   *newmat = M;
3890 
3891   /* save submatrix used in processor for next request */
3892   if (call ==  MAT_INITIAL_MATRIX) {
3893     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3894     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3895   }
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3900 {
3901   PetscInt       m,cstart, cend,j,nnz,i,d;
3902   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3903   const PetscInt *JJ;
3904   PetscErrorCode ierr;
3905   PetscBool      nooffprocentries;
3906 
3907   PetscFunctionBegin;
3908   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3909 
3910   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3911   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3912   m      = B->rmap->n;
3913   cstart = B->cmap->rstart;
3914   cend   = B->cmap->rend;
3915   rstart = B->rmap->rstart;
3916 
3917   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3918 
3919   if (PetscDefined(USE_DEBUG)) {
3920     for (i=0; i<m; i++) {
3921       nnz = Ii[i+1]- Ii[i];
3922       JJ  = J + Ii[i];
3923       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3924       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3925       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3926     }
3927   }
3928 
3929   for (i=0; i<m; i++) {
3930     nnz     = Ii[i+1]- Ii[i];
3931     JJ      = J + Ii[i];
3932     nnz_max = PetscMax(nnz_max,nnz);
3933     d       = 0;
3934     for (j=0; j<nnz; j++) {
3935       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3936     }
3937     d_nnz[i] = d;
3938     o_nnz[i] = nnz - d;
3939   }
3940   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3941   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3942 
3943   for (i=0; i<m; i++) {
3944     ii   = i + rstart;
3945     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3946   }
3947   nooffprocentries    = B->nooffprocentries;
3948   B->nooffprocentries = PETSC_TRUE;
3949   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3950   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3951   B->nooffprocentries = nooffprocentries;
3952 
3953   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3954   PetscFunctionReturn(0);
3955 }
3956 
3957 /*@
3958    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3959    (the default parallel PETSc format).
3960 
3961    Collective
3962 
3963    Input Parameters:
3964 +  B - the matrix
3965 .  i - the indices into j for the start of each local row (starts with zero)
3966 .  j - the column indices for each local row (starts with zero)
3967 -  v - optional values in the matrix
3968 
3969    Level: developer
3970 
3971    Notes:
3972        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3973      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3974      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3975 
3976        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3977 
3978        The format which is used for the sparse matrix input, is equivalent to a
3979     row-major ordering.. i.e for the following matrix, the input data expected is
3980     as shown
3981 
3982 $        1 0 0
3983 $        2 0 3     P0
3984 $       -------
3985 $        4 5 6     P1
3986 $
3987 $     Process0 [P0]: rows_owned=[0,1]
3988 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3989 $        j =  {0,0,2}  [size = 3]
3990 $        v =  {1,2,3}  [size = 3]
3991 $
3992 $     Process1 [P1]: rows_owned=[2]
3993 $        i =  {0,3}    [size = nrow+1  = 1+1]
3994 $        j =  {0,1,2}  [size = 3]
3995 $        v =  {4,5,6}  [size = 3]
3996 
3997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3998           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3999 @*/
4000 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4001 {
4002   PetscErrorCode ierr;
4003 
4004   PetscFunctionBegin;
4005   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4006   PetscFunctionReturn(0);
4007 }
4008 
4009 /*@C
4010    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4011    (the default parallel PETSc format).  For good matrix assembly performance
4012    the user should preallocate the matrix storage by setting the parameters
4013    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4014    performance can be increased by more than a factor of 50.
4015 
4016    Collective
4017 
4018    Input Parameters:
4019 +  B - the matrix
4020 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4021            (same value is used for all local rows)
4022 .  d_nnz - array containing the number of nonzeros in the various rows of the
4023            DIAGONAL portion of the local submatrix (possibly different for each row)
4024            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4025            The size of this array is equal to the number of local rows, i.e 'm'.
4026            For matrices that will be factored, you must leave room for (and set)
4027            the diagonal entry even if it is zero.
4028 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4029            submatrix (same value is used for all local rows).
4030 -  o_nnz - array containing the number of nonzeros in the various rows of the
4031            OFF-DIAGONAL portion of the local submatrix (possibly different for
4032            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4033            structure. The size of this array is equal to the number
4034            of local rows, i.e 'm'.
4035 
4036    If the *_nnz parameter is given then the *_nz parameter is ignored
4037 
4038    The AIJ format (also called the Yale sparse matrix format or
4039    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4040    storage.  The stored row and column indices begin with zero.
4041    See Users-Manual: ch_mat for details.
4042 
4043    The parallel matrix is partitioned such that the first m0 rows belong to
4044    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4045    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4046 
4047    The DIAGONAL portion of the local submatrix of a processor can be defined
4048    as the submatrix which is obtained by extraction the part corresponding to
4049    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4050    first row that belongs to the processor, r2 is the last row belonging to
4051    the this processor, and c1-c2 is range of indices of the local part of a
4052    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4053    common case of a square matrix, the row and column ranges are the same and
4054    the DIAGONAL part is also square. The remaining portion of the local
4055    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4056 
4057    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4058 
4059    You can call MatGetInfo() to get information on how effective the preallocation was;
4060    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4061    You can also run with the option -info and look for messages with the string
4062    malloc in them to see if additional memory allocation was needed.
4063 
4064    Example usage:
4065 
4066    Consider the following 8x8 matrix with 34 non-zero values, that is
4067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4069    as follows:
4070 
4071 .vb
4072             1  2  0  |  0  3  0  |  0  4
4073     Proc0   0  5  6  |  7  0  0  |  8  0
4074             9  0 10  | 11  0  0  | 12  0
4075     -------------------------------------
4076            13  0 14  | 15 16 17  |  0  0
4077     Proc1   0 18  0  | 19 20 21  |  0  0
4078             0  0  0  | 22 23  0  | 24  0
4079     -------------------------------------
4080     Proc2  25 26 27  |  0  0 28  | 29  0
4081            30  0  0  | 31 32 33  |  0 34
4082 .ve
4083 
4084    This can be represented as a collection of submatrices as:
4085 
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092    Where the submatrices A,B,C are owned by proc0, D,E,F are
4093    owned by proc1, G,H,I are owned by proc2.
4094 
4095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4104    matrix, ans [DF] as another SeqAIJ matrix.
4105 
4106    When d_nz, o_nz parameters are specified, d_nz storage elements are
4107    allocated for every row of the local diagonal submatrix, and o_nz
4108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4109    One way to choose d_nz and o_nz is to use the max nonzerors per local
4110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111    In this case, the values of d_nz,o_nz are:
4112 .vb
4113      proc0 : dnz = 2, o_nz = 2
4114      proc1 : dnz = 3, o_nz = 2
4115      proc2 : dnz = 1, o_nz = 4
4116 .ve
4117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119    for proc3. i.e we are using 12+15+10=37 storage locations to store
4120    34 values.
4121 
4122    When d_nnz, o_nnz parameters are specified, the storage is specified
4123    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124    In the above case the values for d_nnz,o_nnz are:
4125 .vb
4126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130    Here the space allocated is sum of all the above values i.e 34, and
4131    hence pre-allocation is perfect.
4132 
4133    Level: intermediate
4134 
4135 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4136           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4137 @*/
4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4139 {
4140   PetscErrorCode ierr;
4141 
4142   PetscFunctionBegin;
4143   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4144   PetscValidType(B,1);
4145   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4146   PetscFunctionReturn(0);
4147 }
4148 
4149 /*@
4150      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4151          CSR format for the local rows.
4152 
4153    Collective
4154 
4155    Input Parameters:
4156 +  comm - MPI communicator
4157 .  m - number of local rows (Cannot be PETSC_DECIDE)
4158 .  n - This value should be the same as the local size used in creating the
4159        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4160        calculated if N is given) For square matrices n is almost always m.
4161 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4162 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4163 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4164 .   j - column indices
4165 -   a - matrix values
4166 
4167    Output Parameter:
4168 .   mat - the matrix
4169 
4170    Level: intermediate
4171 
4172    Notes:
4173        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4174      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4175      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4176 
4177        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4178 
4179        The format which is used for the sparse matrix input, is equivalent to a
4180     row-major ordering.. i.e for the following matrix, the input data expected is
4181     as shown
4182 
4183        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4184 
4185 $        1 0 0
4186 $        2 0 3     P0
4187 $       -------
4188 $        4 5 6     P1
4189 $
4190 $     Process0 [P0]: rows_owned=[0,1]
4191 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4192 $        j =  {0,0,2}  [size = 3]
4193 $        v =  {1,2,3}  [size = 3]
4194 $
4195 $     Process1 [P1]: rows_owned=[2]
4196 $        i =  {0,3}    [size = nrow+1  = 1+1]
4197 $        j =  {0,1,2}  [size = 3]
4198 $        v =  {4,5,6}  [size = 3]
4199 
4200 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4201           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4202 @*/
4203 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4204 {
4205   PetscErrorCode ierr;
4206 
4207   PetscFunctionBegin;
4208   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4209   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4210   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4211   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4212   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4213   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4214   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4215   PetscFunctionReturn(0);
4216 }
4217 
4218 /*@
4219      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4220          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4221 
4222    Collective
4223 
4224    Input Parameters:
4225 +  mat - the matrix
4226 .  m - number of local rows (Cannot be PETSC_DECIDE)
4227 .  n - This value should be the same as the local size used in creating the
4228        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4229        calculated if N is given) For square matrices n is almost always m.
4230 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4231 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4232 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4233 .  J - column indices
4234 -  v - matrix values
4235 
4236    Level: intermediate
4237 
4238 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4239           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4240 @*/
4241 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4242 {
4243   PetscErrorCode ierr;
4244   PetscInt       cstart,nnz,i,j;
4245   PetscInt       *ld;
4246   PetscBool      nooffprocentries;
4247   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4248   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4249   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4250   const PetscInt *Adi = Ad->i;
4251   PetscInt       ldi,Iii,md;
4252 
4253   PetscFunctionBegin;
4254   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4255   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4256   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4257   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4258 
4259   cstart = mat->cmap->rstart;
4260   if (!Aij->ld) {
4261     /* count number of entries below block diagonal */
4262     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4263     Aij->ld = ld;
4264     for (i=0; i<m; i++) {
4265       nnz  = Ii[i+1]- Ii[i];
4266       j     = 0;
4267       while  (J[j] < cstart && j < nnz) {j++;}
4268       J    += nnz;
4269       ld[i] = j;
4270     }
4271   } else {
4272     ld = Aij->ld;
4273   }
4274 
4275   for (i=0; i<m; i++) {
4276     nnz  = Ii[i+1]- Ii[i];
4277     Iii  = Ii[i];
4278     ldi  = ld[i];
4279     md   = Adi[i+1]-Adi[i];
4280     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4281     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4282     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4283     ad  += md;
4284     ao  += nnz - md;
4285   }
4286   nooffprocentries      = mat->nooffprocentries;
4287   mat->nooffprocentries = PETSC_TRUE;
4288   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4289   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4290   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4291   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4292   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4293   mat->nooffprocentries = nooffprocentries;
4294   PetscFunctionReturn(0);
4295 }
4296 
4297 /*@C
4298    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4299    (the default parallel PETSc format).  For good matrix assembly performance
4300    the user should preallocate the matrix storage by setting the parameters
4301    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4302    performance can be increased by more than a factor of 50.
4303 
4304    Collective
4305 
4306    Input Parameters:
4307 +  comm - MPI communicator
4308 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4309            This value should be the same as the local size used in creating the
4310            y vector for the matrix-vector product y = Ax.
4311 .  n - This value should be the same as the local size used in creating the
4312        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4313        calculated if N is given) For square matrices n is almost always m.
4314 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4315 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4316 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4317            (same value is used for all local rows)
4318 .  d_nnz - array containing the number of nonzeros in the various rows of the
4319            DIAGONAL portion of the local submatrix (possibly different for each row)
4320            or NULL, if d_nz is used to specify the nonzero structure.
4321            The size of this array is equal to the number of local rows, i.e 'm'.
4322 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4323            submatrix (same value is used for all local rows).
4324 -  o_nnz - array containing the number of nonzeros in the various rows of the
4325            OFF-DIAGONAL portion of the local submatrix (possibly different for
4326            each row) or NULL, if o_nz is used to specify the nonzero
4327            structure. The size of this array is equal to the number
4328            of local rows, i.e 'm'.
4329 
4330    Output Parameter:
4331 .  A - the matrix
4332 
4333    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4334    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4335    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4336 
4337    Notes:
4338    If the *_nnz parameter is given then the *_nz parameter is ignored
4339 
4340    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4341    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4342    storage requirements for this matrix.
4343 
4344    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4345    processor than it must be used on all processors that share the object for
4346    that argument.
4347 
4348    The user MUST specify either the local or global matrix dimensions
4349    (possibly both).
4350 
4351    The parallel matrix is partitioned across processors such that the
4352    first m0 rows belong to process 0, the next m1 rows belong to
4353    process 1, the next m2 rows belong to process 2 etc.. where
4354    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4355    values corresponding to [m x N] submatrix.
4356 
4357    The columns are logically partitioned with the n0 columns belonging
4358    to 0th partition, the next n1 columns belonging to the next
4359    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4360 
4361    The DIAGONAL portion of the local submatrix on any given processor
4362    is the submatrix corresponding to the rows and columns m,n
4363    corresponding to the given processor. i.e diagonal matrix on
4364    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4365    etc. The remaining portion of the local submatrix [m x (N-n)]
4366    constitute the OFF-DIAGONAL portion. The example below better
4367    illustrates this concept.
4368 
4369    For a square global matrix we define each processor's diagonal portion
4370    to be its local rows and the corresponding columns (a square submatrix);
4371    each processor's off-diagonal portion encompasses the remainder of the
4372    local matrix (a rectangular submatrix).
4373 
4374    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4375 
4376    When calling this routine with a single process communicator, a matrix of
4377    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4378    type of communicator, use the construction mechanism
4379 .vb
4380      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4381 .ve
4382 
4383 $     MatCreate(...,&A);
4384 $     MatSetType(A,MATMPIAIJ);
4385 $     MatSetSizes(A, m,n,M,N);
4386 $     MatMPIAIJSetPreallocation(A,...);
4387 
4388    By default, this format uses inodes (identical nodes) when possible.
4389    We search for consecutive rows with the same nonzero structure, thereby
4390    reusing matrix information to achieve increased efficiency.
4391 
4392    Options Database Keys:
4393 +  -mat_no_inode  - Do not use inodes
4394 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4395 
4396 
4397 
4398    Example usage:
4399 
4400    Consider the following 8x8 matrix with 34 non-zero values, that is
4401    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4402    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4403    as follows
4404 
4405 .vb
4406             1  2  0  |  0  3  0  |  0  4
4407     Proc0   0  5  6  |  7  0  0  |  8  0
4408             9  0 10  | 11  0  0  | 12  0
4409     -------------------------------------
4410            13  0 14  | 15 16 17  |  0  0
4411     Proc1   0 18  0  | 19 20 21  |  0  0
4412             0  0  0  | 22 23  0  | 24  0
4413     -------------------------------------
4414     Proc2  25 26 27  |  0  0 28  | 29  0
4415            30  0  0  | 31 32 33  |  0 34
4416 .ve
4417 
4418    This can be represented as a collection of submatrices as
4419 
4420 .vb
4421       A B C
4422       D E F
4423       G H I
4424 .ve
4425 
4426    Where the submatrices A,B,C are owned by proc0, D,E,F are
4427    owned by proc1, G,H,I are owned by proc2.
4428 
4429    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4430    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4431    The 'M','N' parameters are 8,8, and have the same values on all procs.
4432 
4433    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4434    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4435    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4436    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4437    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4438    matrix, ans [DF] as another SeqAIJ matrix.
4439 
4440    When d_nz, o_nz parameters are specified, d_nz storage elements are
4441    allocated for every row of the local diagonal submatrix, and o_nz
4442    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4443    One way to choose d_nz and o_nz is to use the max nonzerors per local
4444    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4445    In this case, the values of d_nz,o_nz are
4446 .vb
4447      proc0 : dnz = 2, o_nz = 2
4448      proc1 : dnz = 3, o_nz = 2
4449      proc2 : dnz = 1, o_nz = 4
4450 .ve
4451    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4452    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4453    for proc3. i.e we are using 12+15+10=37 storage locations to store
4454    34 values.
4455 
4456    When d_nnz, o_nnz parameters are specified, the storage is specified
4457    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4458    In the above case the values for d_nnz,o_nnz are
4459 .vb
4460      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4461      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4462      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4463 .ve
4464    Here the space allocated is sum of all the above values i.e 34, and
4465    hence pre-allocation is perfect.
4466 
4467    Level: intermediate
4468 
4469 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4470           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4471 @*/
4472 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4473 {
4474   PetscErrorCode ierr;
4475   PetscMPIInt    size;
4476 
4477   PetscFunctionBegin;
4478   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4479   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4480   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4481   if (size > 1) {
4482     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4483     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4484   } else {
4485     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4486     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4487   }
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 /*@C
4492   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4493 
4494   Not collective
4495 
4496   Input Parameter:
4497 . A - The MPIAIJ matrix
4498 
4499   Output Parameters:
4500 + Ad - The local diagonal block as a SeqAIJ matrix
4501 . Ao - The local off-diagonal block as a SeqAIJ matrix
4502 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4503 
4504   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4505   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4506   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4507   local column numbers to global column numbers in the original matrix.
4508 
4509   Level: intermediate
4510 
4511 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4512 @*/
4513 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4514 {
4515   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4516   PetscBool      flg;
4517   PetscErrorCode ierr;
4518 
4519   PetscFunctionBegin;
4520   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4521   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4522   if (Ad)     *Ad     = a->A;
4523   if (Ao)     *Ao     = a->B;
4524   if (colmap) *colmap = a->garray;
4525   PetscFunctionReturn(0);
4526 }
4527 
4528 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4529 {
4530   PetscErrorCode ierr;
4531   PetscInt       m,N,i,rstart,nnz,Ii;
4532   PetscInt       *indx;
4533   PetscScalar    *values;
4534 
4535   PetscFunctionBegin;
4536   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4537   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4538     PetscInt       *dnz,*onz,sum,bs,cbs;
4539 
4540     if (n == PETSC_DECIDE) {
4541       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4542     }
4543     /* Check sum(n) = N */
4544     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4545     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4546 
4547     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4548     rstart -= m;
4549 
4550     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4551     for (i=0; i<m; i++) {
4552       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4553       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4554       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4555     }
4556 
4557     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4558     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4559     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4560     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4561     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4562     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4563     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4564     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4565   }
4566 
4567   /* numeric phase */
4568   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4569   for (i=0; i<m; i++) {
4570     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4571     Ii   = i + rstart;
4572     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4573     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4574   }
4575   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4576   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4577   PetscFunctionReturn(0);
4578 }
4579 
4580 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4581 {
4582   PetscErrorCode    ierr;
4583   PetscMPIInt       rank;
4584   PetscInt          m,N,i,rstart,nnz;
4585   size_t            len;
4586   const PetscInt    *indx;
4587   PetscViewer       out;
4588   char              *name;
4589   Mat               B;
4590   const PetscScalar *values;
4591 
4592   PetscFunctionBegin;
4593   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4594   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4595   /* Should this be the type of the diagonal block of A? */
4596   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4597   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4598   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4599   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4600   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4601   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4602   for (i=0; i<m; i++) {
4603     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4604     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4605     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4606   }
4607   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4608   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4609 
4610   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4611   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4612   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4613   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4614   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4615   ierr = PetscFree(name);CHKERRQ(ierr);
4616   ierr = MatView(B,out);CHKERRQ(ierr);
4617   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4618   ierr = MatDestroy(&B);CHKERRQ(ierr);
4619   PetscFunctionReturn(0);
4620 }
4621 
4622 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4623 {
4624   PetscErrorCode      ierr;
4625   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4626 
4627   PetscFunctionBegin;
4628   if (!merge) PetscFunctionReturn(0);
4629   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4630   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4631   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4632   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4633   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4634   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4635   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4636   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4637   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4638   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4639   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4640   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4641   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4642   ierr = PetscFree(merge);CHKERRQ(ierr);
4643   PetscFunctionReturn(0);
4644 }
4645 
4646 #include <../src/mat/utils/freespace.h>
4647 #include <petscbt.h>
4648 
4649 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4650 {
4651   PetscErrorCode      ierr;
4652   MPI_Comm            comm;
4653   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4654   PetscMPIInt         size,rank,taga,*len_s;
4655   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4656   PetscInt            proc,m;
4657   PetscInt            **buf_ri,**buf_rj;
4658   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4659   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4660   MPI_Request         *s_waits,*r_waits;
4661   MPI_Status          *status;
4662   MatScalar           *aa=a->a;
4663   MatScalar           **abuf_r,*ba_i;
4664   Mat_Merge_SeqsToMPI *merge;
4665   PetscContainer      container;
4666 
4667   PetscFunctionBegin;
4668   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4669   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4670 
4671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4672   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4673 
4674   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4675   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4676   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4677 
4678   bi     = merge->bi;
4679   bj     = merge->bj;
4680   buf_ri = merge->buf_ri;
4681   buf_rj = merge->buf_rj;
4682 
4683   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4684   owners = merge->rowmap->range;
4685   len_s  = merge->len_s;
4686 
4687   /* send and recv matrix values */
4688   /*-----------------------------*/
4689   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4690   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4691 
4692   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4693   for (proc=0,k=0; proc<size; proc++) {
4694     if (!len_s[proc]) continue;
4695     i    = owners[proc];
4696     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4697     k++;
4698   }
4699 
4700   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4701   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4702   ierr = PetscFree(status);CHKERRQ(ierr);
4703 
4704   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4705   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4706 
4707   /* insert mat values of mpimat */
4708   /*----------------------------*/
4709   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4710   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4711 
4712   for (k=0; k<merge->nrecv; k++) {
4713     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4714     nrows       = *(buf_ri_k[k]);
4715     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4716     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4717   }
4718 
4719   /* set values of ba */
4720   m = merge->rowmap->n;
4721   for (i=0; i<m; i++) {
4722     arow = owners[rank] + i;
4723     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4724     bnzi = bi[i+1] - bi[i];
4725     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4726 
4727     /* add local non-zero vals of this proc's seqmat into ba */
4728     anzi   = ai[arow+1] - ai[arow];
4729     aj     = a->j + ai[arow];
4730     aa     = a->a + ai[arow];
4731     nextaj = 0;
4732     for (j=0; nextaj<anzi; j++) {
4733       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4734         ba_i[j] += aa[nextaj++];
4735       }
4736     }
4737 
4738     /* add received vals into ba */
4739     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4740       /* i-th row */
4741       if (i == *nextrow[k]) {
4742         anzi   = *(nextai[k]+1) - *nextai[k];
4743         aj     = buf_rj[k] + *(nextai[k]);
4744         aa     = abuf_r[k] + *(nextai[k]);
4745         nextaj = 0;
4746         for (j=0; nextaj<anzi; j++) {
4747           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4748             ba_i[j] += aa[nextaj++];
4749           }
4750         }
4751         nextrow[k]++; nextai[k]++;
4752       }
4753     }
4754     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4755   }
4756   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4757   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4758 
4759   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4760   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4761   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4762   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4763   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4764   PetscFunctionReturn(0);
4765 }
4766 
4767 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4768 {
4769   PetscErrorCode      ierr;
4770   Mat                 B_mpi;
4771   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4772   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4773   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4774   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4775   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4776   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4777   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4778   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4779   MPI_Status          *status;
4780   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4781   PetscBT             lnkbt;
4782   Mat_Merge_SeqsToMPI *merge;
4783   PetscContainer      container;
4784 
4785   PetscFunctionBegin;
4786   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4787 
4788   /* make sure it is a PETSc comm */
4789   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4790   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4791   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4792 
4793   ierr = PetscNew(&merge);CHKERRQ(ierr);
4794   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4795 
4796   /* determine row ownership */
4797   /*---------------------------------------------------------*/
4798   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4799   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4800   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4801   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4802   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4803   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4804   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4805 
4806   m      = merge->rowmap->n;
4807   owners = merge->rowmap->range;
4808 
4809   /* determine the number of messages to send, their lengths */
4810   /*---------------------------------------------------------*/
4811   len_s = merge->len_s;
4812 
4813   len          = 0; /* length of buf_si[] */
4814   merge->nsend = 0;
4815   for (proc=0; proc<size; proc++) {
4816     len_si[proc] = 0;
4817     if (proc == rank) {
4818       len_s[proc] = 0;
4819     } else {
4820       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4821       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4822     }
4823     if (len_s[proc]) {
4824       merge->nsend++;
4825       nrows = 0;
4826       for (i=owners[proc]; i<owners[proc+1]; i++) {
4827         if (ai[i+1] > ai[i]) nrows++;
4828       }
4829       len_si[proc] = 2*(nrows+1);
4830       len         += len_si[proc];
4831     }
4832   }
4833 
4834   /* determine the number and length of messages to receive for ij-structure */
4835   /*-------------------------------------------------------------------------*/
4836   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4837   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4838 
4839   /* post the Irecv of j-structure */
4840   /*-------------------------------*/
4841   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4842   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4843 
4844   /* post the Isend of j-structure */
4845   /*--------------------------------*/
4846   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4847 
4848   for (proc=0, k=0; proc<size; proc++) {
4849     if (!len_s[proc]) continue;
4850     i    = owners[proc];
4851     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4852     k++;
4853   }
4854 
4855   /* receives and sends of j-structure are complete */
4856   /*------------------------------------------------*/
4857   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4858   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4859 
4860   /* send and recv i-structure */
4861   /*---------------------------*/
4862   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4863   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4864 
4865   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4866   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4867   for (proc=0,k=0; proc<size; proc++) {
4868     if (!len_s[proc]) continue;
4869     /* form outgoing message for i-structure:
4870          buf_si[0]:                 nrows to be sent
4871                [1:nrows]:           row index (global)
4872                [nrows+1:2*nrows+1]: i-structure index
4873     */
4874     /*-------------------------------------------*/
4875     nrows       = len_si[proc]/2 - 1;
4876     buf_si_i    = buf_si + nrows+1;
4877     buf_si[0]   = nrows;
4878     buf_si_i[0] = 0;
4879     nrows       = 0;
4880     for (i=owners[proc]; i<owners[proc+1]; i++) {
4881       anzi = ai[i+1] - ai[i];
4882       if (anzi) {
4883         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4884         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4885         nrows++;
4886       }
4887     }
4888     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4889     k++;
4890     buf_si += len_si[proc];
4891   }
4892 
4893   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4894   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4895 
4896   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4897   for (i=0; i<merge->nrecv; i++) {
4898     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4899   }
4900 
4901   ierr = PetscFree(len_si);CHKERRQ(ierr);
4902   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4903   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4904   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4905   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4906   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4907   ierr = PetscFree(status);CHKERRQ(ierr);
4908 
4909   /* compute a local seq matrix in each processor */
4910   /*----------------------------------------------*/
4911   /* allocate bi array and free space for accumulating nonzero column info */
4912   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4913   bi[0] = 0;
4914 
4915   /* create and initialize a linked list */
4916   nlnk = N+1;
4917   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4918 
4919   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4920   len  = ai[owners[rank+1]] - ai[owners[rank]];
4921   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4922 
4923   current_space = free_space;
4924 
4925   /* determine symbolic info for each local row */
4926   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4927 
4928   for (k=0; k<merge->nrecv; k++) {
4929     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4930     nrows       = *buf_ri_k[k];
4931     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4932     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4933   }
4934 
4935   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4936   len  = 0;
4937   for (i=0; i<m; i++) {
4938     bnzi = 0;
4939     /* add local non-zero cols of this proc's seqmat into lnk */
4940     arow  = owners[rank] + i;
4941     anzi  = ai[arow+1] - ai[arow];
4942     aj    = a->j + ai[arow];
4943     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4944     bnzi += nlnk;
4945     /* add received col data into lnk */
4946     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4947       if (i == *nextrow[k]) { /* i-th row */
4948         anzi  = *(nextai[k]+1) - *nextai[k];
4949         aj    = buf_rj[k] + *nextai[k];
4950         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4951         bnzi += nlnk;
4952         nextrow[k]++; nextai[k]++;
4953       }
4954     }
4955     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4956 
4957     /* if free space is not available, make more free space */
4958     if (current_space->local_remaining<bnzi) {
4959       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4960       nspacedouble++;
4961     }
4962     /* copy data into free space, then initialize lnk */
4963     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4964     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4965 
4966     current_space->array           += bnzi;
4967     current_space->local_used      += bnzi;
4968     current_space->local_remaining -= bnzi;
4969 
4970     bi[i+1] = bi[i] + bnzi;
4971   }
4972 
4973   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4974 
4975   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4976   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4977   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4978 
4979   /* create symbolic parallel matrix B_mpi */
4980   /*---------------------------------------*/
4981   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4982   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4983   if (n==PETSC_DECIDE) {
4984     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4985   } else {
4986     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4987   }
4988   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4989   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4990   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4991   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4992   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4993 
4994   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4995   B_mpi->assembled  = PETSC_FALSE;
4996   merge->bi         = bi;
4997   merge->bj         = bj;
4998   merge->buf_ri     = buf_ri;
4999   merge->buf_rj     = buf_rj;
5000   merge->coi        = NULL;
5001   merge->coj        = NULL;
5002   merge->owners_co  = NULL;
5003 
5004   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5005 
5006   /* attach the supporting struct to B_mpi for reuse */
5007   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5008   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5009   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5010   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5011   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5012   *mpimat = B_mpi;
5013 
5014   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5015   PetscFunctionReturn(0);
5016 }
5017 
5018 /*@C
5019       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5020                  matrices from each processor
5021 
5022     Collective
5023 
5024    Input Parameters:
5025 +    comm - the communicators the parallel matrix will live on
5026 .    seqmat - the input sequential matrices
5027 .    m - number of local rows (or PETSC_DECIDE)
5028 .    n - number of local columns (or PETSC_DECIDE)
5029 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5030 
5031    Output Parameter:
5032 .    mpimat - the parallel matrix generated
5033 
5034     Level: advanced
5035 
5036    Notes:
5037      The dimensions of the sequential matrix in each processor MUST be the same.
5038      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5039      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5040 @*/
5041 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5042 {
5043   PetscErrorCode ierr;
5044   PetscMPIInt    size;
5045 
5046   PetscFunctionBegin;
5047   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5048   if (size == 1) {
5049     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5050     if (scall == MAT_INITIAL_MATRIX) {
5051       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5052     } else {
5053       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5054     }
5055     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5056     PetscFunctionReturn(0);
5057   }
5058   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5059   if (scall == MAT_INITIAL_MATRIX) {
5060     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5061   }
5062   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5063   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5064   PetscFunctionReturn(0);
5065 }
5066 
5067 /*@
5068      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5069           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5070           with MatGetSize()
5071 
5072     Not Collective
5073 
5074    Input Parameters:
5075 +    A - the matrix
5076 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5077 
5078    Output Parameter:
5079 .    A_loc - the local sequential matrix generated
5080 
5081     Level: developer
5082 
5083    Notes:
5084      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5085      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5086      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5087      modify the values of the returned A_loc.
5088 
5089 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5090 
5091 @*/
5092 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5093 {
5094   PetscErrorCode ierr;
5095   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5096   Mat_SeqAIJ     *mat,*a,*b;
5097   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5098   MatScalar      *aa,*ba,*cam;
5099   PetscScalar    *ca;
5100   PetscMPIInt    size;
5101   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5102   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5103   PetscBool      match;
5104 
5105   PetscFunctionBegin;
5106   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5107   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5108   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5109   if (size == 1) {
5110     if (scall == MAT_INITIAL_MATRIX) {
5111       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5112       *A_loc = mpimat->A;
5113     } else if (scall == MAT_REUSE_MATRIX) {
5114       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5115     }
5116     PetscFunctionReturn(0);
5117   }
5118 
5119   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5120   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5121   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5122   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5123   aa = a->a; ba = b->a;
5124   if (scall == MAT_INITIAL_MATRIX) {
5125     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5126     ci[0] = 0;
5127     for (i=0; i<am; i++) {
5128       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5129     }
5130     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5131     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5132     k    = 0;
5133     for (i=0; i<am; i++) {
5134       ncols_o = bi[i+1] - bi[i];
5135       ncols_d = ai[i+1] - ai[i];
5136       /* off-diagonal portion of A */
5137       for (jo=0; jo<ncols_o; jo++) {
5138         col = cmap[*bj];
5139         if (col >= cstart) break;
5140         cj[k]   = col; bj++;
5141         ca[k++] = *ba++;
5142       }
5143       /* diagonal portion of A */
5144       for (j=0; j<ncols_d; j++) {
5145         cj[k]   = cstart + *aj++;
5146         ca[k++] = *aa++;
5147       }
5148       /* off-diagonal portion of A */
5149       for (j=jo; j<ncols_o; j++) {
5150         cj[k]   = cmap[*bj++];
5151         ca[k++] = *ba++;
5152       }
5153     }
5154     /* put together the new matrix */
5155     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5156     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5157     /* Since these are PETSc arrays, change flags to free them as necessary. */
5158     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5159     mat->free_a  = PETSC_TRUE;
5160     mat->free_ij = PETSC_TRUE;
5161     mat->nonew   = 0;
5162   } else if (scall == MAT_REUSE_MATRIX) {
5163     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5164     ci = mat->i; cj = mat->j; cam = mat->a;
5165     for (i=0; i<am; i++) {
5166       /* off-diagonal portion of A */
5167       ncols_o = bi[i+1] - bi[i];
5168       for (jo=0; jo<ncols_o; jo++) {
5169         col = cmap[*bj];
5170         if (col >= cstart) break;
5171         *cam++ = *ba++; bj++;
5172       }
5173       /* diagonal portion of A */
5174       ncols_d = ai[i+1] - ai[i];
5175       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5176       /* off-diagonal portion of A */
5177       for (j=jo; j<ncols_o; j++) {
5178         *cam++ = *ba++; bj++;
5179       }
5180     }
5181   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5182   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5183   PetscFunctionReturn(0);
5184 }
5185 
5186 /*@C
5187      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5188 
5189     Not Collective
5190 
5191    Input Parameters:
5192 +    A - the matrix
5193 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5194 -    row, col - index sets of rows and columns to extract (or NULL)
5195 
5196    Output Parameter:
5197 .    A_loc - the local sequential matrix generated
5198 
5199     Level: developer
5200 
5201 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5202 
5203 @*/
5204 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5205 {
5206   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5207   PetscErrorCode ierr;
5208   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5209   IS             isrowa,iscola;
5210   Mat            *aloc;
5211   PetscBool      match;
5212 
5213   PetscFunctionBegin;
5214   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5215   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5216   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5217   if (!row) {
5218     start = A->rmap->rstart; end = A->rmap->rend;
5219     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5220   } else {
5221     isrowa = *row;
5222   }
5223   if (!col) {
5224     start = A->cmap->rstart;
5225     cmap  = a->garray;
5226     nzA   = a->A->cmap->n;
5227     nzB   = a->B->cmap->n;
5228     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5229     ncols = 0;
5230     for (i=0; i<nzB; i++) {
5231       if (cmap[i] < start) idx[ncols++] = cmap[i];
5232       else break;
5233     }
5234     imark = i;
5235     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5236     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5237     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5238   } else {
5239     iscola = *col;
5240   }
5241   if (scall != MAT_INITIAL_MATRIX) {
5242     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5243     aloc[0] = *A_loc;
5244   }
5245   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5246   if (!col) { /* attach global id of condensed columns */
5247     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5248   }
5249   *A_loc = aloc[0];
5250   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5251   if (!row) {
5252     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5253   }
5254   if (!col) {
5255     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5256   }
5257   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5258   PetscFunctionReturn(0);
5259 }
5260 
5261 /*
5262  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5263  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5264  * on a global size.
5265  * */
5266 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5267 {
5268   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5269   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5270   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5271   PetscMPIInt              owner;
5272   PetscSFNode              *iremote,*oiremote;
5273   const PetscInt           *lrowindices;
5274   PetscErrorCode           ierr;
5275   PetscSF                  sf,osf;
5276   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5277   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5278   MPI_Comm                 comm;
5279   ISLocalToGlobalMapping   mapping;
5280 
5281   PetscFunctionBegin;
5282   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5283   /* plocalsize is the number of roots
5284    * nrows is the number of leaves
5285    * */
5286   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5287   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5288   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5289   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5290   for (i=0;i<nrows;i++) {
5291     /* Find a remote index and an owner for a row
5292      * The row could be local or remote
5293      * */
5294     owner = 0;
5295     lidx  = 0;
5296     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5297     iremote[i].index = lidx;
5298     iremote[i].rank  = owner;
5299   }
5300   /* Create SF to communicate how many nonzero columns for each row */
5301   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5302   /* SF will figure out the number of nonzero colunms for each row, and their
5303    * offsets
5304    * */
5305   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5306   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5307   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5308 
5309   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5310   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5311   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5312   roffsets[0] = 0;
5313   roffsets[1] = 0;
5314   for (i=0;i<plocalsize;i++) {
5315     /* diag */
5316     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5317     /* off diag */
5318     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5319     /* compute offsets so that we relative location for each row */
5320     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5321     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5322   }
5323   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5324   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5325   /* 'r' means root, and 'l' means leaf */
5326   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5327   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5328   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5329   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5330   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5331   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5332   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5333   dntotalcols = 0;
5334   ontotalcols = 0;
5335   ncol = 0;
5336   for (i=0;i<nrows;i++) {
5337     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5338     ncol = PetscMax(pnnz[i],ncol);
5339     /* diag */
5340     dntotalcols += nlcols[i*2+0];
5341     /* off diag */
5342     ontotalcols += nlcols[i*2+1];
5343   }
5344   /* We do not need to figure the right number of columns
5345    * since all the calculations will be done by going through the raw data
5346    * */
5347   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5348   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5349   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5350   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5351   /* diag */
5352   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5353   /* off diag */
5354   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5355   /* diag */
5356   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5357   /* off diag */
5358   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5359   dntotalcols = 0;
5360   ontotalcols = 0;
5361   ntotalcols  = 0;
5362   for (i=0;i<nrows;i++) {
5363     owner = 0;
5364     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5365     /* Set iremote for diag matrix */
5366     for (j=0;j<nlcols[i*2+0];j++) {
5367       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5368       iremote[dntotalcols].rank    = owner;
5369       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5370       ilocal[dntotalcols++]        = ntotalcols++;
5371     }
5372     /* off diag */
5373     for (j=0;j<nlcols[i*2+1];j++) {
5374       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5375       oiremote[ontotalcols].rank    = owner;
5376       oilocal[ontotalcols++]        = ntotalcols++;
5377     }
5378   }
5379   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5380   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5381   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5382   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5383   /* P serves as roots and P_oth is leaves
5384    * Diag matrix
5385    * */
5386   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5387   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5388   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5389 
5390   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5391   /* Off diag */
5392   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5393   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5394   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5395   /* We operate on the matrix internal data for saving memory */
5396   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5397   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5398   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5399   /* Convert to global indices for diag matrix */
5400   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5401   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5402   /* We want P_oth store global indices */
5403   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5404   /* Use memory scalable approach */
5405   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5406   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5407   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5408   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5409   /* Convert back to local indices */
5410   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5411   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5412   nout = 0;
5413   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5414   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5415   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5416   /* Exchange values */
5417   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5418   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5419   /* Stop PETSc from shrinking memory */
5420   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5421   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5422   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5423   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5424   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5425   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5426   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5427   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5428   PetscFunctionReturn(0);
5429 }
5430 
5431 /*
5432  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5433  * This supports MPIAIJ and MAIJ
5434  * */
5435 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5436 {
5437   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5438   Mat_SeqAIJ            *p_oth;
5439   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5440   IS                    rows,map;
5441   PetscHMapI            hamp;
5442   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5443   MPI_Comm              comm;
5444   PetscSF               sf,osf;
5445   PetscBool             has;
5446   PetscErrorCode        ierr;
5447 
5448   PetscFunctionBegin;
5449   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5450   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5451   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5452    *  and then create a submatrix (that often is an overlapping matrix)
5453    * */
5454   if (reuse == MAT_INITIAL_MATRIX) {
5455     /* Use a hash table to figure out unique keys */
5456     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5457     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5458     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5459     count = 0;
5460     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5461     for (i=0;i<a->B->cmap->n;i++) {
5462       key  = a->garray[i]/dof;
5463       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5464       if (!has) {
5465         mapping[i] = count;
5466         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5467       } else {
5468         /* Current 'i' has the same value the previous step */
5469         mapping[i] = count-1;
5470       }
5471     }
5472     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5473     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5474     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5475     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5476     off = 0;
5477     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5478     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5479     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5480     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5481     /* In case, the matrix was already created but users want to recreate the matrix */
5482     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5483     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5484     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5485     ierr = ISDestroy(&map);CHKERRQ(ierr);
5486     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5487   } else if (reuse == MAT_REUSE_MATRIX) {
5488     /* If matrix was already created, we simply update values using SF objects
5489      * that as attached to the matrix ealier.
5490      *  */
5491     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5492     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5493     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5494     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5495     /* Update values in place */
5496     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5497     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5498     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5499     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5500   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5501   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 /*@C
5506     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5507 
5508     Collective on Mat
5509 
5510    Input Parameters:
5511 +    A,B - the matrices in mpiaij format
5512 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5513 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5514 
5515    Output Parameter:
5516 +    rowb, colb - index sets of rows and columns of B to extract
5517 -    B_seq - the sequential matrix generated
5518 
5519     Level: developer
5520 
5521 @*/
5522 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5523 {
5524   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5525   PetscErrorCode ierr;
5526   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5527   IS             isrowb,iscolb;
5528   Mat            *bseq=NULL;
5529 
5530   PetscFunctionBegin;
5531   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5532     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5533   }
5534   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5535 
5536   if (scall == MAT_INITIAL_MATRIX) {
5537     start = A->cmap->rstart;
5538     cmap  = a->garray;
5539     nzA   = a->A->cmap->n;
5540     nzB   = a->B->cmap->n;
5541     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5542     ncols = 0;
5543     for (i=0; i<nzB; i++) {  /* row < local row index */
5544       if (cmap[i] < start) idx[ncols++] = cmap[i];
5545       else break;
5546     }
5547     imark = i;
5548     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5549     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5550     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5551     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5552   } else {
5553     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5554     isrowb  = *rowb; iscolb = *colb;
5555     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5556     bseq[0] = *B_seq;
5557   }
5558   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5559   *B_seq = bseq[0];
5560   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5561   if (!rowb) {
5562     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5563   } else {
5564     *rowb = isrowb;
5565   }
5566   if (!colb) {
5567     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5568   } else {
5569     *colb = iscolb;
5570   }
5571   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5572   PetscFunctionReturn(0);
5573 }
5574 
5575 /*
5576     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5577     of the OFF-DIAGONAL portion of local A
5578 
5579     Collective on Mat
5580 
5581    Input Parameters:
5582 +    A,B - the matrices in mpiaij format
5583 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5584 
5585    Output Parameter:
5586 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5587 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5588 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5589 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5590 
5591     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5592      for this matrix. This is not desirable..
5593 
5594     Level: developer
5595 
5596 */
5597 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5598 {
5599   PetscErrorCode         ierr;
5600   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5601   Mat_SeqAIJ             *b_oth;
5602   VecScatter             ctx;
5603   MPI_Comm               comm;
5604   const PetscMPIInt      *rprocs,*sprocs;
5605   const PetscInt         *srow,*rstarts,*sstarts;
5606   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5607   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5608   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5609   MPI_Request            *rwaits = NULL,*swaits = NULL;
5610   MPI_Status             rstatus;
5611   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5612 
5613   PetscFunctionBegin;
5614   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5615   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5616 
5617   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5618     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5619   }
5620   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5621   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5622 
5623   if (size == 1) {
5624     startsj_s = NULL;
5625     bufa_ptr  = NULL;
5626     *B_oth    = NULL;
5627     PetscFunctionReturn(0);
5628   }
5629 
5630   ctx = a->Mvctx;
5631   tag = ((PetscObject)ctx)->tag;
5632 
5633   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5634   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5635   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5636   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5637   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5638   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5639   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5640 
5641   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5642   if (scall == MAT_INITIAL_MATRIX) {
5643     /* i-array */
5644     /*---------*/
5645     /*  post receives */
5646     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5647     for (i=0; i<nrecvs; i++) {
5648       rowlen = rvalues + rstarts[i]*rbs;
5649       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5650       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5651     }
5652 
5653     /* pack the outgoing message */
5654     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5655 
5656     sstartsj[0] = 0;
5657     rstartsj[0] = 0;
5658     len         = 0; /* total length of j or a array to be sent */
5659     if (nsends) {
5660       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5661       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5662     }
5663     for (i=0; i<nsends; i++) {
5664       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5665       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5666       for (j=0; j<nrows; j++) {
5667         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5668         for (l=0; l<sbs; l++) {
5669           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5670 
5671           rowlen[j*sbs+l] = ncols;
5672 
5673           len += ncols;
5674           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5675         }
5676         k++;
5677       }
5678       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5679 
5680       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5681     }
5682     /* recvs and sends of i-array are completed */
5683     i = nrecvs;
5684     while (i--) {
5685       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5686     }
5687     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5688     ierr = PetscFree(svalues);CHKERRQ(ierr);
5689 
5690     /* allocate buffers for sending j and a arrays */
5691     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5692     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5693 
5694     /* create i-array of B_oth */
5695     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5696 
5697     b_othi[0] = 0;
5698     len       = 0; /* total length of j or a array to be received */
5699     k         = 0;
5700     for (i=0; i<nrecvs; i++) {
5701       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5702       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5703       for (j=0; j<nrows; j++) {
5704         b_othi[k+1] = b_othi[k] + rowlen[j];
5705         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5706         k++;
5707       }
5708       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5709     }
5710     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5711 
5712     /* allocate space for j and a arrrays of B_oth */
5713     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5714     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5715 
5716     /* j-array */
5717     /*---------*/
5718     /*  post receives of j-array */
5719     for (i=0; i<nrecvs; i++) {
5720       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5721       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5722     }
5723 
5724     /* pack the outgoing message j-array */
5725     if (nsends) k = sstarts[0];
5726     for (i=0; i<nsends; i++) {
5727       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5728       bufJ  = bufj+sstartsj[i];
5729       for (j=0; j<nrows; j++) {
5730         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5731         for (ll=0; ll<sbs; ll++) {
5732           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5733           for (l=0; l<ncols; l++) {
5734             *bufJ++ = cols[l];
5735           }
5736           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5737         }
5738       }
5739       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5740     }
5741 
5742     /* recvs and sends of j-array are completed */
5743     i = nrecvs;
5744     while (i--) {
5745       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5746     }
5747     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5748   } else if (scall == MAT_REUSE_MATRIX) {
5749     sstartsj = *startsj_s;
5750     rstartsj = *startsj_r;
5751     bufa     = *bufa_ptr;
5752     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5753     b_otha   = b_oth->a;
5754   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5755 
5756   /* a-array */
5757   /*---------*/
5758   /*  post receives of a-array */
5759   for (i=0; i<nrecvs; i++) {
5760     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5761     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5762   }
5763 
5764   /* pack the outgoing message a-array */
5765   if (nsends) k = sstarts[0];
5766   for (i=0; i<nsends; i++) {
5767     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5768     bufA  = bufa+sstartsj[i];
5769     for (j=0; j<nrows; j++) {
5770       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5771       for (ll=0; ll<sbs; ll++) {
5772         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5773         for (l=0; l<ncols; l++) {
5774           *bufA++ = vals[l];
5775         }
5776         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5777       }
5778     }
5779     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5780   }
5781   /* recvs and sends of a-array are completed */
5782   i = nrecvs;
5783   while (i--) {
5784     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5785   }
5786   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5787   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5788 
5789   if (scall == MAT_INITIAL_MATRIX) {
5790     /* put together the new matrix */
5791     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5792 
5793     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5794     /* Since these are PETSc arrays, change flags to free them as necessary. */
5795     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5796     b_oth->free_a  = PETSC_TRUE;
5797     b_oth->free_ij = PETSC_TRUE;
5798     b_oth->nonew   = 0;
5799 
5800     ierr = PetscFree(bufj);CHKERRQ(ierr);
5801     if (!startsj_s || !bufa_ptr) {
5802       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5803       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5804     } else {
5805       *startsj_s = sstartsj;
5806       *startsj_r = rstartsj;
5807       *bufa_ptr  = bufa;
5808     }
5809   }
5810 
5811   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5812   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5813   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5814   PetscFunctionReturn(0);
5815 }
5816 
5817 /*@C
5818   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5819 
5820   Not Collective
5821 
5822   Input Parameters:
5823 . A - The matrix in mpiaij format
5824 
5825   Output Parameter:
5826 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5827 . colmap - A map from global column index to local index into lvec
5828 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5829 
5830   Level: developer
5831 
5832 @*/
5833 #if defined(PETSC_USE_CTABLE)
5834 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5835 #else
5836 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5837 #endif
5838 {
5839   Mat_MPIAIJ *a;
5840 
5841   PetscFunctionBegin;
5842   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5843   PetscValidPointer(lvec, 2);
5844   PetscValidPointer(colmap, 3);
5845   PetscValidPointer(multScatter, 4);
5846   a = (Mat_MPIAIJ*) A->data;
5847   if (lvec) *lvec = a->lvec;
5848   if (colmap) *colmap = a->colmap;
5849   if (multScatter) *multScatter = a->Mvctx;
5850   PetscFunctionReturn(0);
5851 }
5852 
5853 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5855 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5856 #if defined(PETSC_HAVE_MKL_SPARSE)
5857 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5858 #endif
5859 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5860 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5861 #if defined(PETSC_HAVE_ELEMENTAL)
5862 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5863 #endif
5864 #if defined(PETSC_HAVE_SCALAPACK)
5865 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5866 #endif
5867 #if defined(PETSC_HAVE_HYPRE)
5868 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5869 #endif
5870 #if defined(PETSC_HAVE_CUDA)
5871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5872 #endif
5873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5874 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5875 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5876 
5877 /*
5878     Computes (B'*A')' since computing B*A directly is untenable
5879 
5880                n                       p                          p
5881         [             ]       [             ]         [                 ]
5882       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5883         [             ]       [             ]         [                 ]
5884 
5885 */
5886 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5887 {
5888   PetscErrorCode ierr;
5889   Mat            At,Bt,Ct;
5890 
5891   PetscFunctionBegin;
5892   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5893   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5894   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5895   ierr = MatDestroy(&At);CHKERRQ(ierr);
5896   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5897   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5898   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5899   PetscFunctionReturn(0);
5900 }
5901 
5902 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5903 {
5904   PetscErrorCode ierr;
5905   PetscBool      cisdense;
5906 
5907   PetscFunctionBegin;
5908   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5909   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5910   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5911   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5912   if (!cisdense) {
5913     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5914   }
5915   ierr = MatSetUp(C);CHKERRQ(ierr);
5916 
5917   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5918   PetscFunctionReturn(0);
5919 }
5920 
5921 /* ----------------------------------------------------------------*/
5922 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5923 {
5924   Mat_Product *product = C->product;
5925   Mat         A = product->A,B=product->B;
5926 
5927   PetscFunctionBegin;
5928   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5929     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5930 
5931   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5932   C->ops->productsymbolic = MatProductSymbolic_AB;
5933   PetscFunctionReturn(0);
5934 }
5935 
5936 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5937 {
5938   PetscErrorCode ierr;
5939   Mat_Product    *product = C->product;
5940 
5941   PetscFunctionBegin;
5942   if (product->type == MATPRODUCT_AB) {
5943     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5944   }
5945   PetscFunctionReturn(0);
5946 }
5947 /* ----------------------------------------------------------------*/
5948 
5949 /*MC
5950    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5951 
5952    Options Database Keys:
5953 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5954 
5955    Level: beginner
5956 
5957    Notes:
5958     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5959     in this case the values associated with the rows and columns one passes in are set to zero
5960     in the matrix
5961 
5962     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5963     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5964 
5965 .seealso: MatCreateAIJ()
5966 M*/
5967 
5968 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5969 {
5970   Mat_MPIAIJ     *b;
5971   PetscErrorCode ierr;
5972   PetscMPIInt    size;
5973 
5974   PetscFunctionBegin;
5975   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5976 
5977   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5978   B->data       = (void*)b;
5979   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5980   B->assembled  = PETSC_FALSE;
5981   B->insertmode = NOT_SET_VALUES;
5982   b->size       = size;
5983 
5984   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5985 
5986   /* build cache for off array entries formed */
5987   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5988 
5989   b->donotstash  = PETSC_FALSE;
5990   b->colmap      = NULL;
5991   b->garray      = NULL;
5992   b->roworiented = PETSC_TRUE;
5993 
5994   /* stuff used for matrix vector multiply */
5995   b->lvec  = NULL;
5996   b->Mvctx = NULL;
5997 
5998   /* stuff for MatGetRow() */
5999   b->rowindices   = NULL;
6000   b->rowvalues    = NULL;
6001   b->getrowactive = PETSC_FALSE;
6002 
6003   /* flexible pointer used in CUSP/CUSPARSE classes */
6004   b->spptr = NULL;
6005 
6006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6016 #if defined(PETSC_HAVE_MKL_SPARSE)
6017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6018 #endif
6019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6022 #if defined(PETSC_HAVE_ELEMENTAL)
6023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6024 #endif
6025 #if defined(PETSC_HAVE_SCALAPACK)
6026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6027 #endif
6028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6030 #if defined(PETSC_HAVE_HYPRE)
6031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6033 #endif
6034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6036   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6037   PetscFunctionReturn(0);
6038 }
6039 
6040 /*@C
6041      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6042          and "off-diagonal" part of the matrix in CSR format.
6043 
6044    Collective
6045 
6046    Input Parameters:
6047 +  comm - MPI communicator
6048 .  m - number of local rows (Cannot be PETSC_DECIDE)
6049 .  n - This value should be the same as the local size used in creating the
6050        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6051        calculated if N is given) For square matrices n is almost always m.
6052 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6053 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6054 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6055 .   j - column indices
6056 .   a - matrix values
6057 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6058 .   oj - column indices
6059 -   oa - matrix values
6060 
6061    Output Parameter:
6062 .   mat - the matrix
6063 
6064    Level: advanced
6065 
6066    Notes:
6067        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6068        must free the arrays once the matrix has been destroyed and not before.
6069 
6070        The i and j indices are 0 based
6071 
6072        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6073 
6074        This sets local rows and cannot be used to set off-processor values.
6075 
6076        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6077        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6078        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6079        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6080        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6081        communication if it is known that only local entries will be set.
6082 
6083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6084           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6085 @*/
6086 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6087 {
6088   PetscErrorCode ierr;
6089   Mat_MPIAIJ     *maij;
6090 
6091   PetscFunctionBegin;
6092   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6093   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6094   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6095   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6096   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6097   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6098   maij = (Mat_MPIAIJ*) (*mat)->data;
6099 
6100   (*mat)->preallocated = PETSC_TRUE;
6101 
6102   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6103   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6104 
6105   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6106   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6107 
6108   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6109   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6110   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6111   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6112 
6113   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6114   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6115   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6116   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6117   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6118   PetscFunctionReturn(0);
6119 }
6120 
6121 /*
6122     Special version for direct calls from Fortran
6123 */
6124 #include <petsc/private/fortranimpl.h>
6125 
6126 /* Change these macros so can be used in void function */
6127 #undef CHKERRQ
6128 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6129 #undef SETERRQ2
6130 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6131 #undef SETERRQ3
6132 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6133 #undef SETERRQ
6134 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6135 
6136 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6137 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6138 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6139 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6140 #else
6141 #endif
6142 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6143 {
6144   Mat            mat  = *mmat;
6145   PetscInt       m    = *mm, n = *mn;
6146   InsertMode     addv = *maddv;
6147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6148   PetscScalar    value;
6149   PetscErrorCode ierr;
6150 
6151   MatCheckPreallocated(mat,1);
6152   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6153   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6154   {
6155     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6156     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6157     PetscBool roworiented = aij->roworiented;
6158 
6159     /* Some Variables required in the macro */
6160     Mat        A                    = aij->A;
6161     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6162     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6163     MatScalar  *aa                  = a->a;
6164     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6165     Mat        B                    = aij->B;
6166     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6167     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6168     MatScalar  *ba                  = b->a;
6169     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6170      * cannot use "#if defined" inside a macro. */
6171     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6172 
6173     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6174     PetscInt  nonew = a->nonew;
6175     MatScalar *ap1,*ap2;
6176 
6177     PetscFunctionBegin;
6178     for (i=0; i<m; i++) {
6179       if (im[i] < 0) continue;
6180       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6181       if (im[i] >= rstart && im[i] < rend) {
6182         row      = im[i] - rstart;
6183         lastcol1 = -1;
6184         rp1      = aj + ai[row];
6185         ap1      = aa + ai[row];
6186         rmax1    = aimax[row];
6187         nrow1    = ailen[row];
6188         low1     = 0;
6189         high1    = nrow1;
6190         lastcol2 = -1;
6191         rp2      = bj + bi[row];
6192         ap2      = ba + bi[row];
6193         rmax2    = bimax[row];
6194         nrow2    = bilen[row];
6195         low2     = 0;
6196         high2    = nrow2;
6197 
6198         for (j=0; j<n; j++) {
6199           if (roworiented) value = v[i*n+j];
6200           else value = v[i+j*m];
6201           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6202           if (in[j] >= cstart && in[j] < cend) {
6203             col = in[j] - cstart;
6204             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6205 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6206             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6207 #endif
6208           } else if (in[j] < 0) continue;
6209           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6210             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6211             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6212           } else {
6213             if (mat->was_assembled) {
6214               if (!aij->colmap) {
6215                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6216               }
6217 #if defined(PETSC_USE_CTABLE)
6218               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6219               col--;
6220 #else
6221               col = aij->colmap[in[j]] - 1;
6222 #endif
6223               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6224                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6225                 col  =  in[j];
6226                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6227                 B        = aij->B;
6228                 b        = (Mat_SeqAIJ*)B->data;
6229                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6230                 rp2      = bj + bi[row];
6231                 ap2      = ba + bi[row];
6232                 rmax2    = bimax[row];
6233                 nrow2    = bilen[row];
6234                 low2     = 0;
6235                 high2    = nrow2;
6236                 bm       = aij->B->rmap->n;
6237                 ba       = b->a;
6238                 inserted = PETSC_FALSE;
6239               }
6240             } else col = in[j];
6241             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6242 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6243             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6244 #endif
6245           }
6246         }
6247       } else if (!aij->donotstash) {
6248         if (roworiented) {
6249           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6250         } else {
6251           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6252         }
6253       }
6254     }
6255   }
6256   PetscFunctionReturnVoid();
6257 }
6258