xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cf9c20a2d58da010f7c4712defbcdf61cc8f72b5)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840 
841         i = j;
842       }
843     }
844     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
845   }
846 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
847   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
848   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
849   if (mat->boundtocpu) {
850     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
851     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
852   }
853 #endif
854   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
855   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
856 
857   /* determine if any processor has disassembled, if so we must
858      also disassemble ourself, in order that we may reassemble. */
859   /*
860      if nonzero structure of submatrix B cannot change then we know that
861      no processor disassembled thus we can skip this stuff
862   */
863   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
864     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
865     if (mat->was_assembled && !other_disassembled) {
866 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
867       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
868 #endif
869       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
870     }
871   }
872   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
873     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
874   }
875   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
876 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
877   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
878 #endif
879   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
880   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
881 
882   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
883 
884   aij->rowvalues = 0;
885 
886   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
887   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
888 
889   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
890   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
891     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
892     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
893   }
894 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
895   mat->offloadmask = PETSC_OFFLOAD_BOTH;
896 #endif
897   PetscFunctionReturn(0);
898 }
899 
900 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
901 {
902   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
903   PetscErrorCode ierr;
904 
905   PetscFunctionBegin;
906   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
907   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
914   PetscObjectState sA, sB;
915   PetscInt        *lrows;
916   PetscInt         r, len;
917   PetscBool        cong, lch, gch;
918   PetscErrorCode   ierr;
919 
920   PetscFunctionBegin;
921   /* get locally owned rows */
922   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
923   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
924   /* fix right hand side if needed */
925   if (x && b) {
926     const PetscScalar *xx;
927     PetscScalar       *bb;
928 
929     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
930     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
931     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
932     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
933     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
934     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
935   }
936 
937   sA = mat->A->nonzerostate;
938   sB = mat->B->nonzerostate;
939 
940   if (diag != 0.0 && cong) {
941     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
942     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
943   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
944     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
945     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
946     PetscInt   nnwA, nnwB;
947     PetscBool  nnzA, nnzB;
948 
949     nnwA = aijA->nonew;
950     nnwB = aijB->nonew;
951     nnzA = aijA->keepnonzeropattern;
952     nnzB = aijB->keepnonzeropattern;
953     if (!nnzA) {
954       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
955       aijA->nonew = 0;
956     }
957     if (!nnzB) {
958       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
959       aijB->nonew = 0;
960     }
961     /* Must zero here before the next loop */
962     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
964     for (r = 0; r < len; ++r) {
965       const PetscInt row = lrows[r] + A->rmap->rstart;
966       if (row >= A->cmap->N) continue;
967       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
968     }
969     aijA->nonew = nnwA;
970     aijB->nonew = nnwB;
971   } else {
972     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
974   }
975   ierr = PetscFree(lrows);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978 
979   /* reduce nonzerostate */
980   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
981   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
982   if (gch) A->nonzerostate++;
983   PetscFunctionReturn(0);
984 }
985 
986 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
987 {
988   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode    ierr;
990   PetscMPIInt       n = A->rmap->n;
991   PetscInt          i,j,r,m,len = 0;
992   PetscInt          *lrows,*owners = A->rmap->range;
993   PetscMPIInt       p = 0;
994   PetscSFNode       *rrows;
995   PetscSF           sf;
996   const PetscScalar *xx;
997   PetscScalar       *bb,*mask;
998   Vec               xmask,lmask;
999   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
1000   const PetscInt    *aj, *ii,*ridx;
1001   PetscScalar       *aa;
1002 
1003   PetscFunctionBegin;
1004   /* Create SF where leaves are input rows and roots are owned rows */
1005   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1006   for (r = 0; r < n; ++r) lrows[r] = -1;
1007   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1008   for (r = 0; r < N; ++r) {
1009     const PetscInt idx   = rows[r];
1010     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1011     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1012       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1013     }
1014     rrows[r].rank  = p;
1015     rrows[r].index = rows[r] - owners[p];
1016   }
1017   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1018   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1019   /* Collect flags for rows to be zeroed */
1020   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1022   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1023   /* Compress and put in row numbers */
1024   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1025   /* zero diagonal part of matrix */
1026   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1027   /* handle off diagonal part of matrix */
1028   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1029   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1030   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1031   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1032   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1033   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1036   if (x && b) { /* this code is buggy when the row and column layout don't match */
1037     PetscBool cong;
1038 
1039     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1040     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1041     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1044     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1045   }
1046   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1047   /* remove zeroed rows of off diagonal matrix */
1048   ii = aij->i;
1049   for (i=0; i<len; i++) {
1050     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1051   }
1052   /* loop over all elements of off process part of matrix zeroing removed columns*/
1053   if (aij->compressedrow.use) {
1054     m    = aij->compressedrow.nrows;
1055     ii   = aij->compressedrow.i;
1056     ridx = aij->compressedrow.rindex;
1057     for (i=0; i<m; i++) {
1058       n  = ii[i+1] - ii[i];
1059       aj = aij->j + ii[i];
1060       aa = aij->a + ii[i];
1061 
1062       for (j=0; j<n; j++) {
1063         if (PetscAbsScalar(mask[*aj])) {
1064           if (b) bb[*ridx] -= *aa*xx[*aj];
1065           *aa = 0.0;
1066         }
1067         aa++;
1068         aj++;
1069       }
1070       ridx++;
1071     }
1072   } else { /* do not use compressed row format */
1073     m = l->B->rmap->n;
1074     for (i=0; i<m; i++) {
1075       n  = ii[i+1] - ii[i];
1076       aj = aij->j + ii[i];
1077       aa = aij->a + ii[i];
1078       for (j=0; j<n; j++) {
1079         if (PetscAbsScalar(mask[*aj])) {
1080           if (b) bb[i] -= *aa*xx[*aj];
1081           *aa = 0.0;
1082         }
1083         aa++;
1084         aj++;
1085       }
1086     }
1087   }
1088   if (x && b) {
1089     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1090     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1091   }
1092   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1093   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1094   ierr = PetscFree(lrows);CHKERRQ(ierr);
1095 
1096   /* only change matrix nonzero state if pattern was allowed to be changed */
1097   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1098     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1099     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1100   }
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1105 {
1106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       nt;
1109   VecScatter     Mvctx = a->Mvctx;
1110 
1111   PetscFunctionBegin;
1112   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1113   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1114 
1115   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1123 {
1124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136   VecScatter     Mvctx = a->Mvctx;
1137 
1138   PetscFunctionBegin;
1139   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1140   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1142   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* do local part */
1156   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1157   /* add partial results together */
1158   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1164 {
1165   MPI_Comm       comm;
1166   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1167   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1168   IS             Me,Notme;
1169   PetscErrorCode ierr;
1170   PetscInt       M,N,first,last,*notme,i;
1171   PetscBool      lf;
1172   PetscMPIInt    size;
1173 
1174   PetscFunctionBegin;
1175   /* Easy test: symmetric diagonal block */
1176   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1177   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1178   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1179   if (!*f) PetscFunctionReturn(0);
1180   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1182   if (size == 1) PetscFunctionReturn(0);
1183 
1184   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1185   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1186   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1187   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1188   for (i=0; i<first; i++) notme[i] = i;
1189   for (i=last; i<M; i++) notme[i-last+first] = i;
1190   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1191   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1192   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1193   Aoff = Aoffs[0];
1194   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1195   Boff = Boffs[0];
1196   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1197   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1198   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1199   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1200   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1201   ierr = PetscFree(notme);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1206 {
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215 {
1216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   /* do nondiagonal part */
1221   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1222   /* do local part */
1223   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1224   /* add partial results together */
1225   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*
1231   This only works correctly for square matrices where the subblock A->A is the
1232    diagonal block
1233 */
1234 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1235 {
1236   PetscErrorCode ierr;
1237   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1238 
1239   PetscFunctionBegin;
1240   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1241   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1242   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1247 {
1248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1253   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1258 {
1259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263 #if defined(PETSC_USE_LOG)
1264   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1265 #endif
1266   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1267   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1268   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1269   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1270 #if defined(PETSC_USE_CTABLE)
1271   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1272 #else
1273   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1274 #endif
1275   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1276   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1278   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1279   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1280   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1281   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1282 
1283   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1293 #if defined(PETSC_HAVE_ELEMENTAL)
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1295 #endif
1296 #if defined(PETSC_HAVE_HYPRE)
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1299 #endif
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1307 {
1308   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1309   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1310   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1311   const PetscInt    *garray = aij->garray;
1312   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1313   PetscInt          *rowlens;
1314   PetscInt          *colidxs;
1315   PetscScalar       *matvals;
1316   PetscErrorCode    ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1320 
1321   M  = mat->rmap->N;
1322   N  = mat->cmap->N;
1323   m  = mat->rmap->n;
1324   rs = mat->rmap->rstart;
1325   cs = mat->cmap->rstart;
1326   nz = A->nz + B->nz;
1327 
1328   /* write matrix header */
1329   header[0] = MAT_FILE_CLASSID;
1330   header[1] = M; header[2] = N; header[3] = nz;
1331   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1332   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1333 
1334   /* fill in and store row lengths  */
1335   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1336   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1337   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1338   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1339 
1340   /* fill in and store column indices */
1341   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1342   for (cnt=0, i=0; i<m; i++) {
1343     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1344       if (garray[B->j[jb]] > cs) break;
1345       colidxs[cnt++] = garray[B->j[jb]];
1346     }
1347     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1348       colidxs[cnt++] = A->j[ja] + cs;
1349     for (; jb<B->i[i+1]; jb++)
1350       colidxs[cnt++] = garray[B->j[jb]];
1351   }
1352   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1353   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1354   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1355 
1356   /* fill in and store nonzero values */
1357   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1358   for (cnt=0, i=0; i<m; i++) {
1359     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1360       if (garray[B->j[jb]] > cs) break;
1361       matvals[cnt++] = B->a[jb];
1362     }
1363     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1364       matvals[cnt++] = A->a[ja];
1365     for (; jb<B->i[i+1]; jb++)
1366       matvals[cnt++] = B->a[jb];
1367   }
1368   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1369   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1370   ierr = PetscFree(matvals);CHKERRQ(ierr);
1371 
1372   /* write block size option to the viewer's .info file */
1373   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #include <petscdraw.h>
1378 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1379 {
1380   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1381   PetscErrorCode    ierr;
1382   PetscMPIInt       rank = aij->rank,size = aij->size;
1383   PetscBool         isdraw,iascii,isbinary;
1384   PetscViewer       sviewer;
1385   PetscViewerFormat format;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1391   if (iascii) {
1392     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1393     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1394       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1395       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1396       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397       for (i=0; i<(PetscInt)size; i++) {
1398         nmax = PetscMax(nmax,nz[i]);
1399         nmin = PetscMin(nmin,nz[i]);
1400         navg += nz[i];
1401       }
1402       ierr = PetscFree(nz);CHKERRQ(ierr);
1403       navg = navg/size;
1404       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1405       PetscFunctionReturn(0);
1406     }
1407     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1408     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1409       MatInfo   info;
1410       PetscBool inodes;
1411 
1412       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1413       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1414       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1416       if (!inodes) {
1417         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1418                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1419       } else {
1420         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1421                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1422       }
1423       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1424       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1425       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1426       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1427       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1429       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1430       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1431       PetscFunctionReturn(0);
1432     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1433       PetscInt inodecount,inodelimit,*inodes;
1434       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1435       if (inodes) {
1436         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1437       } else {
1438         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1439       }
1440       PetscFunctionReturn(0);
1441     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1442       PetscFunctionReturn(0);
1443     }
1444   } else if (isbinary) {
1445     if (size == 1) {
1446       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1447       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1448     } else {
1449       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1450     }
1451     PetscFunctionReturn(0);
1452   } else if (iascii && size == 1) {
1453     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1454     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1455     PetscFunctionReturn(0);
1456   } else if (isdraw) {
1457     PetscDraw draw;
1458     PetscBool isnull;
1459     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1460     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1461     if (isnull) PetscFunctionReturn(0);
1462   }
1463 
1464   { /* assemble the entire matrix onto first processor */
1465     Mat A = NULL, Av;
1466     IS  isrow,iscol;
1467 
1468     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1469     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1470     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1471     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1472 /*  The commented code uses MatCreateSubMatrices instead */
1473 /*
1474     Mat *AA, A = NULL, Av;
1475     IS  isrow,iscol;
1476 
1477     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1478     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1479     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1480     if (!rank) {
1481        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1482        A    = AA[0];
1483        Av   = AA[0];
1484     }
1485     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1486 */
1487     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1488     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1489     /*
1490        Everyone has to call to draw the matrix since the graphics waits are
1491        synchronized across all processors that share the PetscDraw object
1492     */
1493     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1494     if (!rank) {
1495       if (((PetscObject)mat)->name) {
1496         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1497       }
1498       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1499     }
1500     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1501     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1502     ierr = MatDestroy(&A);CHKERRQ(ierr);
1503   }
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1508 {
1509   PetscErrorCode ierr;
1510   PetscBool      iascii,isdraw,issocket,isbinary;
1511 
1512   PetscFunctionBegin;
1513   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1514   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1515   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1516   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1517   if (iascii || isdraw || isbinary || issocket) {
1518     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1519   }
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1524 {
1525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1526   PetscErrorCode ierr;
1527   Vec            bb1 = 0;
1528   PetscBool      hasop;
1529 
1530   PetscFunctionBegin;
1531   if (flag == SOR_APPLY_UPPER) {
1532     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1533     PetscFunctionReturn(0);
1534   }
1535 
1536   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1537     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1538   }
1539 
1540   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1541     if (flag & SOR_ZERO_INITIAL_GUESS) {
1542       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1543       its--;
1544     }
1545 
1546     while (its--) {
1547       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1549 
1550       /* update rhs: bb1 = bb - B*x */
1551       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1552       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1553 
1554       /* local sweep */
1555       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1556     }
1557   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1558     if (flag & SOR_ZERO_INITIAL_GUESS) {
1559       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1560       its--;
1561     }
1562     while (its--) {
1563       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1565 
1566       /* update rhs: bb1 = bb - B*x */
1567       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1568       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1569 
1570       /* local sweep */
1571       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1572     }
1573   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1574     if (flag & SOR_ZERO_INITIAL_GUESS) {
1575       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1576       its--;
1577     }
1578     while (its--) {
1579       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1581 
1582       /* update rhs: bb1 = bb - B*x */
1583       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1584       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1585 
1586       /* local sweep */
1587       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1588     }
1589   } else if (flag & SOR_EISENSTAT) {
1590     Vec xx1;
1591 
1592     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1593     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1594 
1595     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1597     if (!mat->diag) {
1598       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1599       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1600     }
1601     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1602     if (hasop) {
1603       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1604     } else {
1605       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1606     }
1607     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1608 
1609     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1610 
1611     /* local sweep */
1612     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1613     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1614     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1615   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1616 
1617   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1618 
1619   matin->factorerrortype = mat->A->factorerrortype;
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1624 {
1625   Mat            aA,aB,Aperm;
1626   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1627   PetscScalar    *aa,*ba;
1628   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1629   PetscSF        rowsf,sf;
1630   IS             parcolp = NULL;
1631   PetscBool      done;
1632   PetscErrorCode ierr;
1633 
1634   PetscFunctionBegin;
1635   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1636   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1637   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1638   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1639 
1640   /* Invert row permutation to find out where my rows should go */
1641   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1642   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1643   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1644   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1645   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1646   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1647 
1648   /* Invert column permutation to find out where my columns should go */
1649   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1650   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1651   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1652   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1653   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1654   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1656 
1657   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1658   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1659   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1660 
1661   /* Find out where my gcols should go */
1662   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1663   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1664   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1665   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1666   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1667   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1669   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1670 
1671   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1672   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1673   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1674   for (i=0; i<m; i++) {
1675     PetscInt    row = rdest[i];
1676     PetscMPIInt rowner;
1677     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       PetscInt    col = cdest[aj[j]];
1680       PetscMPIInt cowner;
1681       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1682       if (rowner == cowner) dnnz[i]++;
1683       else onnz[i]++;
1684     }
1685     for (j=bi[i]; j<bi[i+1]; j++) {
1686       PetscInt    col = gcdest[bj[j]];
1687       PetscMPIInt cowner;
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692   }
1693   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1694   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1695   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1696   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1697   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1698 
1699   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1700   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1701   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1702   for (i=0; i<m; i++) {
1703     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1704     PetscInt j0,rowlen;
1705     rowlen = ai[i+1] - ai[i];
1706     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1707       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1708       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1709     }
1710     rowlen = bi[i+1] - bi[i];
1711     for (j0=j=0; j<rowlen; j0=j) {
1712       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1713       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1714     }
1715   }
1716   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1717   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1718   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1719   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1720   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1721   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1722   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1723   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1724   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1725   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1726   *B = Aperm;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1731 {
1732   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1737   if (ghosts) *ghosts = aij->garray;
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A    = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscLogDouble isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size = 1.0;
1750   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751 
1752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1753   isend[3] = info->memory;  isend[4] = info->mallocs;
1754 
1755   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1756 
1757   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1758   isend[3] += info->memory;  isend[4] += info->mallocs;
1759   if (flag == MAT_LOCAL) {
1760     info->nz_used      = isend[0];
1761     info->nz_allocated = isend[1];
1762     info->nz_unneeded  = isend[2];
1763     info->memory       = isend[3];
1764     info->mallocs      = isend[4];
1765   } else if (flag == MAT_GLOBAL_MAX) {
1766     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1767 
1768     info->nz_used      = irecv[0];
1769     info->nz_allocated = irecv[1];
1770     info->nz_unneeded  = irecv[2];
1771     info->memory       = irecv[3];
1772     info->mallocs      = irecv[4];
1773   } else if (flag == MAT_GLOBAL_SUM) {
1774     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1775 
1776     info->nz_used      = irecv[0];
1777     info->nz_allocated = irecv[1];
1778     info->nz_unneeded  = irecv[2];
1779     info->memory       = irecv[3];
1780     info->mallocs      = irecv[4];
1781   }
1782   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1783   info->fill_ratio_needed = 0;
1784   info->factor_mallocs    = 0;
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1789 {
1790   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1791   PetscErrorCode ierr;
1792 
1793   PetscFunctionBegin;
1794   switch (op) {
1795   case MAT_NEW_NONZERO_LOCATIONS:
1796   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1797   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1798   case MAT_KEEP_NONZERO_PATTERN:
1799   case MAT_NEW_NONZERO_LOCATION_ERR:
1800   case MAT_USE_INODES:
1801   case MAT_IGNORE_ZERO_ENTRIES:
1802     MatCheckPreallocated(A,1);
1803     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1804     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1805     break;
1806   case MAT_ROW_ORIENTED:
1807     MatCheckPreallocated(A,1);
1808     a->roworiented = flg;
1809 
1810     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1811     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_NEW_DIAGONALS:
1814   case MAT_SORTED_FULL:
1815     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1816     break;
1817   case MAT_IGNORE_OFF_PROC_ENTRIES:
1818     a->donotstash = flg;
1819     break;
1820   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1821   case MAT_SPD:
1822   case MAT_SYMMETRIC:
1823   case MAT_STRUCTURALLY_SYMMETRIC:
1824   case MAT_HERMITIAN:
1825   case MAT_SYMMETRY_ETERNAL:
1826     break;
1827   case MAT_SUBMAT_SINGLEIS:
1828     A->submat_singleis = flg;
1829     break;
1830   case MAT_STRUCTURE_ONLY:
1831     /* The option is handled directly by MatSetOption() */
1832     break;
1833   default:
1834     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1835   }
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1840 {
1841   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1842   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1843   PetscErrorCode ierr;
1844   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1845   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1846   PetscInt       *cmap,*idx_p;
1847 
1848   PetscFunctionBegin;
1849   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1850   mat->getrowactive = PETSC_TRUE;
1851 
1852   if (!mat->rowvalues && (idx || v)) {
1853     /*
1854         allocate enough space to hold information from the longest row.
1855     */
1856     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1857     PetscInt   max = 1,tmp;
1858     for (i=0; i<matin->rmap->n; i++) {
1859       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1860       if (max < tmp) max = tmp;
1861     }
1862     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1863   }
1864 
1865   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1866   lrow = row - rstart;
1867 
1868   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1869   if (!v)   {pvA = 0; pvB = 0;}
1870   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1871   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1872   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1873   nztot = nzA + nzB;
1874 
1875   cmap = mat->garray;
1876   if (v  || idx) {
1877     if (nztot) {
1878       /* Sort by increasing column numbers, assuming A and B already sorted */
1879       PetscInt imark = -1;
1880       if (v) {
1881         *v = v_p = mat->rowvalues;
1882         for (i=0; i<nzB; i++) {
1883           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1884           else break;
1885         }
1886         imark = i;
1887         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1888         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1889       }
1890       if (idx) {
1891         *idx = idx_p = mat->rowindices;
1892         if (imark > -1) {
1893           for (i=0; i<imark; i++) {
1894             idx_p[i] = cmap[cworkB[i]];
1895           }
1896         } else {
1897           for (i=0; i<nzB; i++) {
1898             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1899             else break;
1900           }
1901           imark = i;
1902         }
1903         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1904         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1905       }
1906     } else {
1907       if (idx) *idx = 0;
1908       if (v)   *v   = 0;
1909     }
1910   }
1911   *nz  = nztot;
1912   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1913   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1918 {
1919   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1920 
1921   PetscFunctionBegin;
1922   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1923   aij->getrowactive = PETSC_FALSE;
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1928 {
1929   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1930   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1931   PetscErrorCode ierr;
1932   PetscInt       i,j,cstart = mat->cmap->rstart;
1933   PetscReal      sum = 0.0;
1934   MatScalar      *v;
1935 
1936   PetscFunctionBegin;
1937   if (aij->size == 1) {
1938     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1939   } else {
1940     if (type == NORM_FROBENIUS) {
1941       v = amat->a;
1942       for (i=0; i<amat->nz; i++) {
1943         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1944       }
1945       v = bmat->a;
1946       for (i=0; i<bmat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1950       *norm = PetscSqrtReal(*norm);
1951       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1952     } else if (type == NORM_1) { /* max column norm */
1953       PetscReal *tmp,*tmp2;
1954       PetscInt  *jj,*garray = aij->garray;
1955       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1956       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1957       *norm = 0.0;
1958       v     = amat->a; jj = amat->j;
1959       for (j=0; j<amat->nz; j++) {
1960         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1961       }
1962       v = bmat->a; jj = bmat->j;
1963       for (j=0; j<bmat->nz; j++) {
1964         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1965       }
1966       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1967       for (j=0; j<mat->cmap->N; j++) {
1968         if (tmp2[j] > *norm) *norm = tmp2[j];
1969       }
1970       ierr = PetscFree(tmp);CHKERRQ(ierr);
1971       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1972       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1973     } else if (type == NORM_INFINITY) { /* max row norm */
1974       PetscReal ntemp = 0.0;
1975       for (j=0; j<aij->A->rmap->n; j++) {
1976         v   = amat->a + amat->i[j];
1977         sum = 0.0;
1978         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1979           sum += PetscAbsScalar(*v); v++;
1980         }
1981         v = bmat->a + bmat->i[j];
1982         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         if (sum > ntemp) ntemp = sum;
1986       }
1987       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1988       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1989     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1990   }
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1995 {
1996   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1997   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1998   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1999   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2000   PetscErrorCode  ierr;
2001   Mat             B,A_diag,*B_diag;
2002   const MatScalar *array;
2003 
2004   PetscFunctionBegin;
2005   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2006   ai = Aloc->i; aj = Aloc->j;
2007   bi = Bloc->i; bj = Bloc->j;
2008   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2009     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2010     PetscSFNode          *oloc;
2011     PETSC_UNUSED PetscSF sf;
2012 
2013     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2014     /* compute d_nnz for preallocation */
2015     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2016     for (i=0; i<ai[ma]; i++) {
2017       d_nnz[aj[i]]++;
2018     }
2019     /* compute local off-diagonal contributions */
2020     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2021     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2022     /* map those to global */
2023     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2024     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2025     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2026     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2027     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2028     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2029     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2030 
2031     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2032     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2033     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2034     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2035     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2036     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2037   } else {
2038     B    = *matout;
2039     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2040   }
2041 
2042   b           = (Mat_MPIAIJ*)B->data;
2043   A_diag      = a->A;
2044   B_diag      = &b->A;
2045   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2046   A_diag_ncol = A_diag->cmap->N;
2047   B_diag_ilen = sub_B_diag->ilen;
2048   B_diag_i    = sub_B_diag->i;
2049 
2050   /* Set ilen for diagonal of B */
2051   for (i=0; i<A_diag_ncol; i++) {
2052     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2053   }
2054 
2055   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2056   very quickly (=without using MatSetValues), because all writes are local. */
2057   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2058 
2059   /* copy over the B part */
2060   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2061   array = Bloc->a;
2062   row   = A->rmap->rstart;
2063   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2064   cols_tmp = cols;
2065   for (i=0; i<mb; i++) {
2066     ncol = bi[i+1]-bi[i];
2067     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2068     row++;
2069     array += ncol; cols_tmp += ncol;
2070   }
2071   ierr = PetscFree(cols);CHKERRQ(ierr);
2072 
2073   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2076     *matout = B;
2077   } else {
2078     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2084 {
2085   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2086   Mat            a    = aij->A,b = aij->B;
2087   PetscErrorCode ierr;
2088   PetscInt       s1,s2,s3;
2089 
2090   PetscFunctionBegin;
2091   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2092   if (rr) {
2093     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2094     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2095     /* Overlap communication with computation. */
2096     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2097   }
2098   if (ll) {
2099     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2100     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2101     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2102   }
2103   /* scale  the diagonal block */
2104   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2105 
2106   if (rr) {
2107     /* Do a scatter end and then right scale the off-diagonal block */
2108     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2109     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2110   }
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2115 {
2116   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2117   PetscErrorCode ierr;
2118 
2119   PetscFunctionBegin;
2120   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2125 {
2126   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2127   Mat            a,b,c,d;
2128   PetscBool      flg;
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   a = matA->A; b = matA->B;
2133   c = matB->A; d = matB->B;
2134 
2135   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2136   if (flg) {
2137     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2138   }
2139   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2144 {
2145   PetscErrorCode ierr;
2146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2147   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2148 
2149   PetscFunctionBegin;
2150   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2151   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2152     /* because of the column compression in the off-processor part of the matrix a->B,
2153        the number of columns in a->B and b->B may be different, hence we cannot call
2154        the MatCopy() directly on the two parts. If need be, we can provide a more
2155        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2156        then copying the submatrices */
2157     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2158   } else {
2159     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2160     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2161   }
2162   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2167 {
2168   PetscErrorCode ierr;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /*
2176    Computes the number of nonzeros per row needed for preallocation when X and Y
2177    have different nonzero structure.
2178 */
2179 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2180 {
2181   PetscInt       i,j,k,nzx,nzy;
2182 
2183   PetscFunctionBegin;
2184   /* Set the number of nonzeros in the new matrix */
2185   for (i=0; i<m; i++) {
2186     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2187     nzx = xi[i+1] - xi[i];
2188     nzy = yi[i+1] - yi[i];
2189     nnz[i] = 0;
2190     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2191       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2192       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2193       nnz[i]++;
2194     }
2195     for (; k<nzy; k++) nnz[i]++;
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2201 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2202 {
2203   PetscErrorCode ierr;
2204   PetscInt       m = Y->rmap->N;
2205   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2206   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2214 {
2215   PetscErrorCode ierr;
2216   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2217   PetscBLASInt   bnz,one=1;
2218   Mat_SeqAIJ     *x,*y;
2219 
2220   PetscFunctionBegin;
2221   if (str == SAME_NONZERO_PATTERN) {
2222     PetscScalar alpha = a;
2223     x    = (Mat_SeqAIJ*)xx->A->data;
2224     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2225     y    = (Mat_SeqAIJ*)yy->A->data;
2226     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2227     x    = (Mat_SeqAIJ*)xx->B->data;
2228     y    = (Mat_SeqAIJ*)yy->B->data;
2229     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2232     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2233        will be updated */
2234 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2235     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2236       Y->offloadmask = PETSC_OFFLOAD_CPU;
2237     }
2238 #endif
2239   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2240     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2241   } else {
2242     Mat      B;
2243     PetscInt *nnz_d,*nnz_o;
2244     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2245     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2246     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2247     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2248     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2249     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2250     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2251     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2253     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2254     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2255     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2256     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2258   }
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2263 
2264 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2265 {
2266 #if defined(PETSC_USE_COMPLEX)
2267   PetscErrorCode ierr;
2268   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2272   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2273 #else
2274   PetscFunctionBegin;
2275 #endif
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2280 {
2281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2282   PetscErrorCode ierr;
2283 
2284   PetscFunctionBegin;
2285   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2286   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2291 {
2292   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2297   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2302 {
2303   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2304   PetscErrorCode ierr;
2305   PetscInt       i,*idxb = 0;
2306   PetscScalar    *va,*vb;
2307   Vec            vtmp;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2311   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2312   if (idx) {
2313     for (i=0; i<A->rmap->n; i++) {
2314       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2315     }
2316   }
2317 
2318   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2319   if (idx) {
2320     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2321   }
2322   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2323   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2324 
2325   for (i=0; i<A->rmap->n; i++) {
2326     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2327       va[i] = vb[i];
2328       if (idx) idx[i] = a->garray[idxb[i]];
2329     }
2330   }
2331 
2332   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2333   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2334   ierr = PetscFree(idxb);CHKERRQ(ierr);
2335   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2340 {
2341   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2342   PetscErrorCode ierr;
2343   PetscInt       i,*idxb = 0;
2344   PetscScalar    *va,*vb;
2345   Vec            vtmp;
2346 
2347   PetscFunctionBegin;
2348   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2349   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2350   if (idx) {
2351     for (i=0; i<A->cmap->n; i++) {
2352       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2353     }
2354   }
2355 
2356   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2357   if (idx) {
2358     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2359   }
2360   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2361   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2362 
2363   for (i=0; i<A->rmap->n; i++) {
2364     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2365       va[i] = vb[i];
2366       if (idx) idx[i] = a->garray[idxb[i]];
2367     }
2368   }
2369 
2370   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2371   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2372   ierr = PetscFree(idxb);CHKERRQ(ierr);
2373   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2378 {
2379   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2380   PetscInt       n      = A->rmap->n;
2381   PetscInt       cstart = A->cmap->rstart;
2382   PetscInt       *cmap  = mat->garray;
2383   PetscInt       *diagIdx, *offdiagIdx;
2384   Vec            diagV, offdiagV;
2385   PetscScalar    *a, *diagA, *offdiagA;
2386   PetscInt       r;
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2391   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2392   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2393   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2394   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2395   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2396   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2397   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2398   for (r = 0; r < n; ++r) {
2399     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2400       a[r]   = diagA[r];
2401       idx[r] = cstart + diagIdx[r];
2402     } else {
2403       a[r]   = offdiagA[r];
2404       idx[r] = cmap[offdiagIdx[r]];
2405     }
2406   }
2407   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2408   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2409   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2410   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2411   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2412   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2417 {
2418   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2419   PetscInt       n      = A->rmap->n;
2420   PetscInt       cstart = A->cmap->rstart;
2421   PetscInt       *cmap  = mat->garray;
2422   PetscInt       *diagIdx, *offdiagIdx;
2423   Vec            diagV, offdiagV;
2424   PetscScalar    *a, *diagA, *offdiagA;
2425   PetscInt       r;
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2430   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2432   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2434   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2435   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2436   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2437   for (r = 0; r < n; ++r) {
2438     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2439       a[r]   = diagA[r];
2440       idx[r] = cstart + diagIdx[r];
2441     } else {
2442       a[r]   = offdiagA[r];
2443       idx[r] = cmap[offdiagIdx[r]];
2444     }
2445   }
2446   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2447   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2449   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2450   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2451   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2456 {
2457   PetscErrorCode ierr;
2458   Mat            *dummy;
2459 
2460   PetscFunctionBegin;
2461   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2462   *newmat = *dummy;
2463   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2468 {
2469   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2470   PetscErrorCode ierr;
2471 
2472   PetscFunctionBegin;
2473   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2474   A->factorerrortype = a->A->factorerrortype;
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2479 {
2480   PetscErrorCode ierr;
2481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2482 
2483   PetscFunctionBegin;
2484   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2485   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2486   if (x->assembled) {
2487     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2488   } else {
2489     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2490   }
2491   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2492   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2497 {
2498   PetscFunctionBegin;
2499   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2500   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 /*@
2505    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2506 
2507    Collective on Mat
2508 
2509    Input Parameters:
2510 +    A - the matrix
2511 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2512 
2513  Level: advanced
2514 
2515 @*/
2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2517 {
2518   PetscErrorCode       ierr;
2519 
2520   PetscFunctionBegin;
2521   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2526 {
2527   PetscErrorCode       ierr;
2528   PetscBool            sc = PETSC_FALSE,flg;
2529 
2530   PetscFunctionBegin;
2531   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2532   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2533   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2534   if (flg) {
2535     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2536   }
2537   ierr = PetscOptionsTail();CHKERRQ(ierr);
2538   PetscFunctionReturn(0);
2539 }
2540 
2541 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2542 {
2543   PetscErrorCode ierr;
2544   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2545   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2546 
2547   PetscFunctionBegin;
2548   if (!Y->preallocated) {
2549     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2550   } else if (!aij->nz) {
2551     PetscInt nonew = aij->nonew;
2552     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2553     aij->nonew = nonew;
2554   }
2555   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2560 {
2561   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2562   PetscErrorCode ierr;
2563 
2564   PetscFunctionBegin;
2565   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2566   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2567   if (d) {
2568     PetscInt rstart;
2569     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2570     *d += rstart;
2571 
2572   }
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2577 {
2578   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2579   PetscErrorCode ierr;
2580 
2581   PetscFunctionBegin;
2582   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 /* -------------------------------------------------------------------*/
2587 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2588                                        MatGetRow_MPIAIJ,
2589                                        MatRestoreRow_MPIAIJ,
2590                                        MatMult_MPIAIJ,
2591                                 /* 4*/ MatMultAdd_MPIAIJ,
2592                                        MatMultTranspose_MPIAIJ,
2593                                        MatMultTransposeAdd_MPIAIJ,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                 /*10*/ 0,
2598                                        0,
2599                                        0,
2600                                        MatSOR_MPIAIJ,
2601                                        MatTranspose_MPIAIJ,
2602                                 /*15*/ MatGetInfo_MPIAIJ,
2603                                        MatEqual_MPIAIJ,
2604                                        MatGetDiagonal_MPIAIJ,
2605                                        MatDiagonalScale_MPIAIJ,
2606                                        MatNorm_MPIAIJ,
2607                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2608                                        MatAssemblyEnd_MPIAIJ,
2609                                        MatSetOption_MPIAIJ,
2610                                        MatZeroEntries_MPIAIJ,
2611                                 /*24*/ MatZeroRows_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                 /*29*/ MatSetUp_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                        MatGetDiagonalBlock_MPIAIJ,
2620                                        0,
2621                                 /*34*/ MatDuplicate_MPIAIJ,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                 /*39*/ MatAXPY_MPIAIJ,
2627                                        MatCreateSubMatrices_MPIAIJ,
2628                                        MatIncreaseOverlap_MPIAIJ,
2629                                        MatGetValues_MPIAIJ,
2630                                        MatCopy_MPIAIJ,
2631                                 /*44*/ MatGetRowMax_MPIAIJ,
2632                                        MatScale_MPIAIJ,
2633                                        MatShift_MPIAIJ,
2634                                        MatDiagonalSet_MPIAIJ,
2635                                        MatZeroRowsColumns_MPIAIJ,
2636                                 /*49*/ MatSetRandom_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2642                                        0,
2643                                        MatSetUnfactored_MPIAIJ,
2644                                        MatPermute_MPIAIJ,
2645                                        0,
2646                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2647                                        MatDestroy_MPIAIJ,
2648                                        MatView_MPIAIJ,
2649                                        0,
2650                                        0,
2651                                 /*64*/ 0,
2652                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2657                                        MatGetRowMinAbs_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                 /*75*/ MatFDColoringApply_AIJ,
2663                                        MatSetFromOptions_MPIAIJ,
2664                                        0,
2665                                        0,
2666                                        MatFindZeroDiagonals_MPIAIJ,
2667                                 /*80*/ 0,
2668                                        0,
2669                                        0,
2670                                 /*83*/ MatLoad_MPIAIJ,
2671                                        MatIsSymmetric_MPIAIJ,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                 /*89*/ 0,
2677                                        0,
2678                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        MatBindToCPU_MPIAIJ,
2686                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2687                                        0,
2688                                        0,
2689                                        MatConjugate_MPIAIJ,
2690                                        0,
2691                                 /*104*/MatSetValuesRow_MPIAIJ,
2692                                        MatRealPart_MPIAIJ,
2693                                        MatImaginaryPart_MPIAIJ,
2694                                        0,
2695                                        0,
2696                                 /*109*/0,
2697                                        0,
2698                                        MatGetRowMin_MPIAIJ,
2699                                        0,
2700                                        MatMissingDiagonal_MPIAIJ,
2701                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2702                                        0,
2703                                        MatGetGhosts_MPIAIJ,
2704                                        0,
2705                                        0,
2706                                 /*119*/0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        MatGetMultiProcBlock_MPIAIJ,
2711                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2712                                        MatGetColumnNorms_MPIAIJ,
2713                                        MatInvertBlockDiagonal_MPIAIJ,
2714                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2715                                        MatCreateSubMatricesMPI_MPIAIJ,
2716                                 /*129*/0,
2717                                        0,
2718                                        0,
2719                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2720                                        0,
2721                                 /*134*/0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                        0,
2726                                 /*139*/MatSetBlockSizes_MPIAIJ,
2727                                        0,
2728                                        0,
2729                                        MatFDColoringSetUp_MPIXAIJ,
2730                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2731                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2732                                 /*145*/0,
2733                                        0,
2734                                        0
2735 };
2736 
2737 /* ----------------------------------------------------------------------------------------*/
2738 
2739 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2740 {
2741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2746   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2751 {
2752   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2753   PetscErrorCode ierr;
2754 
2755   PetscFunctionBegin;
2756   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2757   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2762 {
2763   Mat_MPIAIJ     *b;
2764   PetscErrorCode ierr;
2765   PetscMPIInt    size;
2766 
2767   PetscFunctionBegin;
2768   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2769   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2770   b = (Mat_MPIAIJ*)B->data;
2771 
2772 #if defined(PETSC_USE_CTABLE)
2773   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2774 #else
2775   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2776 #endif
2777   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2778   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2779   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2780 
2781   /* Because the B will have been resized we simply destroy it and create a new one each time */
2782   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2783   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2784   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2785   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2786   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2787   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2788   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2789 
2790   if (!B->preallocated) {
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2793     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2794     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2795     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2796   }
2797 
2798   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2799   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2800   B->preallocated  = PETSC_TRUE;
2801   B->was_assembled = PETSC_FALSE;
2802   B->assembled     = PETSC_FALSE;
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2807 {
2808   Mat_MPIAIJ     *b;
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2813   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2814   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2815   b = (Mat_MPIAIJ*)B->data;
2816 
2817 #if defined(PETSC_USE_CTABLE)
2818   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2819 #else
2820   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2821 #endif
2822   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2823   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2824   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2825 
2826   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2827   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2828   B->preallocated  = PETSC_TRUE;
2829   B->was_assembled = PETSC_FALSE;
2830   B->assembled = PETSC_FALSE;
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2835 {
2836   Mat            mat;
2837   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2838   PetscErrorCode ierr;
2839 
2840   PetscFunctionBegin;
2841   *newmat = 0;
2842   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2843   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2844   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2845   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2846   a       = (Mat_MPIAIJ*)mat->data;
2847 
2848   mat->factortype   = matin->factortype;
2849   mat->assembled    = matin->assembled;
2850   mat->insertmode   = NOT_SET_VALUES;
2851   mat->preallocated = matin->preallocated;
2852 
2853   a->size         = oldmat->size;
2854   a->rank         = oldmat->rank;
2855   a->donotstash   = oldmat->donotstash;
2856   a->roworiented  = oldmat->roworiented;
2857   a->rowindices   = NULL;
2858   a->rowvalues    = NULL;
2859   a->getrowactive = PETSC_FALSE;
2860 
2861   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2862   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2863 
2864   if (oldmat->colmap) {
2865 #if defined(PETSC_USE_CTABLE)
2866     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2867 #else
2868     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2869     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2870     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2871 #endif
2872   } else a->colmap = NULL;
2873   if (oldmat->garray) {
2874     PetscInt len;
2875     len  = oldmat->B->cmap->n;
2876     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2877     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2878     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2879   } else a->garray = NULL;
2880 
2881   /* It may happen MatDuplicate is called with a non-assembled matrix
2882      In fact, MatDuplicate only requires the matrix to be preallocated
2883      This may happen inside a DMCreateMatrix_Shell */
2884   if (oldmat->lvec) {
2885     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2886     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2887   }
2888   if (oldmat->Mvctx) {
2889     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2890     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2891   }
2892   if (oldmat->Mvctx_mpi1) {
2893     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2894     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2895   }
2896 
2897   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2898   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2899   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2900   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2901   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2902   *newmat = mat;
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2907 {
2908   PetscBool      isbinary, ishdf5;
2909   PetscErrorCode ierr;
2910 
2911   PetscFunctionBegin;
2912   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2913   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2914   /* force binary viewer to load .info file if it has not yet done so */
2915   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2916   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2917   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2918   if (isbinary) {
2919     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2920   } else if (ishdf5) {
2921 #if defined(PETSC_HAVE_HDF5)
2922     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2923 #else
2924     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2925 #endif
2926   } else {
2927     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2928   }
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2933 {
2934   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2935   PetscInt       *rowidxs,*colidxs;
2936   PetscScalar    *matvals;
2937   PetscErrorCode ierr;
2938 
2939   PetscFunctionBegin;
2940   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2941 
2942   /* read in matrix header */
2943   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2944   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2945   M  = header[1]; N = header[2]; nz = header[3];
2946   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2947   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2948   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2949 
2950   /* set block sizes from the viewer's .info file */
2951   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2952   /* set global sizes if not set already */
2953   if (mat->rmap->N < 0) mat->rmap->N = M;
2954   if (mat->cmap->N < 0) mat->cmap->N = N;
2955   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2957 
2958   /* check if the matrix sizes are correct */
2959   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2960   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2961 
2962   /* read in row lengths and build row indices */
2963   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2964   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2965   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2966   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2967   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2968   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2969   /* read in column indices and matrix values */
2970   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2971   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2972   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2973   /* store matrix indices and values */
2974   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2975   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2976   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 /* Not scalable because of ISAllGather() unless getting all columns. */
2981 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2982 {
2983   PetscErrorCode ierr;
2984   IS             iscol_local;
2985   PetscBool      isstride;
2986   PetscMPIInt    lisstride=0,gisstride;
2987 
2988   PetscFunctionBegin;
2989   /* check if we are grabbing all columns*/
2990   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2991 
2992   if (isstride) {
2993     PetscInt  start,len,mstart,mlen;
2994     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2995     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2996     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2997     if (mstart == start && mlen-mstart == len) lisstride = 1;
2998   }
2999 
3000   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3001   if (gisstride) {
3002     PetscInt N;
3003     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3004     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3005     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3006     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3007   } else {
3008     PetscInt cbs;
3009     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3010     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3011     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3012   }
3013 
3014   *isseq = iscol_local;
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 /*
3019  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3020  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3021 
3022  Input Parameters:
3023    mat - matrix
3024    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3025            i.e., mat->rstart <= isrow[i] < mat->rend
3026    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3027            i.e., mat->cstart <= iscol[i] < mat->cend
3028  Output Parameter:
3029    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3030    iscol_o - sequential column index set for retrieving mat->B
3031    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3032  */
3033 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3034 {
3035   PetscErrorCode ierr;
3036   Vec            x,cmap;
3037   const PetscInt *is_idx;
3038   PetscScalar    *xarray,*cmaparray;
3039   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3040   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3041   Mat            B=a->B;
3042   Vec            lvec=a->lvec,lcmap;
3043   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3044   MPI_Comm       comm;
3045   VecScatter     Mvctx=a->Mvctx;
3046 
3047   PetscFunctionBegin;
3048   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3049   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3050 
3051   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3052   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3053   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3054   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3055   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3056 
3057   /* Get start indices */
3058   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3059   isstart -= ncols;
3060   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3061 
3062   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3063   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3064   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3065   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3066   for (i=0; i<ncols; i++) {
3067     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3068     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3069     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3070   }
3071   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3072   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3073   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3074 
3075   /* Get iscol_d */
3076   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3077   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3078   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3079 
3080   /* Get isrow_d */
3081   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3082   rstart = mat->rmap->rstart;
3083   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3084   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3085   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3086   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3087 
3088   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3089   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3090   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3091 
3092   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3093   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3094   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3095 
3096   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3097 
3098   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3099   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3100 
3101   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3102   /* off-process column indices */
3103   count = 0;
3104   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3105   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3106 
3107   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3108   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3109   for (i=0; i<Bn; i++) {
3110     if (PetscRealPart(xarray[i]) > -1.0) {
3111       idx[count]     = i;                   /* local column index in off-diagonal part B */
3112       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3113       count++;
3114     }
3115   }
3116   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3117   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3118 
3119   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3120   /* cannot ensure iscol_o has same blocksize as iscol! */
3121 
3122   ierr = PetscFree(idx);CHKERRQ(ierr);
3123   *garray = cmap1;
3124 
3125   ierr = VecDestroy(&x);CHKERRQ(ierr);
3126   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3127   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3128   PetscFunctionReturn(0);
3129 }
3130 
3131 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3132 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3133 {
3134   PetscErrorCode ierr;
3135   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3136   Mat            M = NULL;
3137   MPI_Comm       comm;
3138   IS             iscol_d,isrow_d,iscol_o;
3139   Mat            Asub = NULL,Bsub = NULL;
3140   PetscInt       n;
3141 
3142   PetscFunctionBegin;
3143   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3144 
3145   if (call == MAT_REUSE_MATRIX) {
3146     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3147     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3148     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3149 
3150     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3151     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3152 
3153     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3154     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3155 
3156     /* Update diagonal and off-diagonal portions of submat */
3157     asub = (Mat_MPIAIJ*)(*submat)->data;
3158     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3159     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3160     if (n) {
3161       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3162     }
3163     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165 
3166   } else { /* call == MAT_INITIAL_MATRIX) */
3167     const PetscInt *garray;
3168     PetscInt        BsubN;
3169 
3170     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3171     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3172 
3173     /* Create local submatrices Asub and Bsub */
3174     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3175     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3176 
3177     /* Create submatrix M */
3178     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3179 
3180     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3181     asub = (Mat_MPIAIJ*)M->data;
3182 
3183     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3184     n = asub->B->cmap->N;
3185     if (BsubN > n) {
3186       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3187       const PetscInt *idx;
3188       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3189       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3190 
3191       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3192       j = 0;
3193       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3194       for (i=0; i<n; i++) {
3195         if (j >= BsubN) break;
3196         while (subgarray[i] > garray[j]) j++;
3197 
3198         if (subgarray[i] == garray[j]) {
3199           idx_new[i] = idx[j++];
3200         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3201       }
3202       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3203 
3204       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3205       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3206 
3207     } else if (BsubN < n) {
3208       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3209     }
3210 
3211     ierr = PetscFree(garray);CHKERRQ(ierr);
3212     *submat = M;
3213 
3214     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3215     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3216     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3217 
3218     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3219     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3220 
3221     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3222     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3223   }
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3228 {
3229   PetscErrorCode ierr;
3230   IS             iscol_local=NULL,isrow_d;
3231   PetscInt       csize;
3232   PetscInt       n,i,j,start,end;
3233   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3234   MPI_Comm       comm;
3235 
3236   PetscFunctionBegin;
3237   /* If isrow has same processor distribution as mat,
3238      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3239   if (call == MAT_REUSE_MATRIX) {
3240     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3241     if (isrow_d) {
3242       sameRowDist  = PETSC_TRUE;
3243       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3244     } else {
3245       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3246       if (iscol_local) {
3247         sameRowDist  = PETSC_TRUE;
3248         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3249       }
3250     }
3251   } else {
3252     /* Check if isrow has same processor distribution as mat */
3253     sameDist[0] = PETSC_FALSE;
3254     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3255     if (!n) {
3256       sameDist[0] = PETSC_TRUE;
3257     } else {
3258       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3259       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3260       if (i >= start && j < end) {
3261         sameDist[0] = PETSC_TRUE;
3262       }
3263     }
3264 
3265     /* Check if iscol has same processor distribution as mat */
3266     sameDist[1] = PETSC_FALSE;
3267     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3268     if (!n) {
3269       sameDist[1] = PETSC_TRUE;
3270     } else {
3271       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3272       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3273       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3274     }
3275 
3276     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3277     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3278     sameRowDist = tsameDist[0];
3279   }
3280 
3281   if (sameRowDist) {
3282     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3283       /* isrow and iscol have same processor distribution as mat */
3284       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3285       PetscFunctionReturn(0);
3286     } else { /* sameRowDist */
3287       /* isrow has same processor distribution as mat */
3288       if (call == MAT_INITIAL_MATRIX) {
3289         PetscBool sorted;
3290         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3291         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3292         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3293         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3294 
3295         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3296         if (sorted) {
3297           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3298           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3299           PetscFunctionReturn(0);
3300         }
3301       } else { /* call == MAT_REUSE_MATRIX */
3302         IS    iscol_sub;
3303         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3304         if (iscol_sub) {
3305           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3306           PetscFunctionReturn(0);
3307         }
3308       }
3309     }
3310   }
3311 
3312   /* General case: iscol -> iscol_local which has global size of iscol */
3313   if (call == MAT_REUSE_MATRIX) {
3314     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3315     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3316   } else {
3317     if (!iscol_local) {
3318       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3319     }
3320   }
3321 
3322   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3323   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3324 
3325   if (call == MAT_INITIAL_MATRIX) {
3326     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3327     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3328   }
3329   PetscFunctionReturn(0);
3330 }
3331 
3332 /*@C
3333      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3334          and "off-diagonal" part of the matrix in CSR format.
3335 
3336    Collective
3337 
3338    Input Parameters:
3339 +  comm - MPI communicator
3340 .  A - "diagonal" portion of matrix
3341 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3342 -  garray - global index of B columns
3343 
3344    Output Parameter:
3345 .   mat - the matrix, with input A as its local diagonal matrix
3346    Level: advanced
3347 
3348    Notes:
3349        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3350        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3351 
3352 .seealso: MatCreateMPIAIJWithSplitArrays()
3353 @*/
3354 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3355 {
3356   PetscErrorCode ierr;
3357   Mat_MPIAIJ     *maij;
3358   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3359   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3360   PetscScalar    *oa=b->a;
3361   Mat            Bnew;
3362   PetscInt       m,n,N;
3363 
3364   PetscFunctionBegin;
3365   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3366   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3367   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3368   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3369   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3370   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3371 
3372   /* Get global columns of mat */
3373   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3374 
3375   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3376   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3377   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3378   maij = (Mat_MPIAIJ*)(*mat)->data;
3379 
3380   (*mat)->preallocated = PETSC_TRUE;
3381 
3382   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3383   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3384 
3385   /* Set A as diagonal portion of *mat */
3386   maij->A = A;
3387 
3388   nz = oi[m];
3389   for (i=0; i<nz; i++) {
3390     col   = oj[i];
3391     oj[i] = garray[col];
3392   }
3393 
3394    /* Set Bnew as off-diagonal portion of *mat */
3395   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3396   bnew        = (Mat_SeqAIJ*)Bnew->data;
3397   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3398   maij->B     = Bnew;
3399 
3400   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3401 
3402   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3403   b->free_a       = PETSC_FALSE;
3404   b->free_ij      = PETSC_FALSE;
3405   ierr = MatDestroy(&B);CHKERRQ(ierr);
3406 
3407   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3408   bnew->free_a       = PETSC_TRUE;
3409   bnew->free_ij      = PETSC_TRUE;
3410 
3411   /* condense columns of maij->B */
3412   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3413   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3414   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3415   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3416   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3417   PetscFunctionReturn(0);
3418 }
3419 
3420 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3421 
3422 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3423 {
3424   PetscErrorCode ierr;
3425   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3426   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3427   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3428   Mat            M,Msub,B=a->B;
3429   MatScalar      *aa;
3430   Mat_SeqAIJ     *aij;
3431   PetscInt       *garray = a->garray,*colsub,Ncols;
3432   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3433   IS             iscol_sub,iscmap;
3434   const PetscInt *is_idx,*cmap;
3435   PetscBool      allcolumns=PETSC_FALSE;
3436   MPI_Comm       comm;
3437 
3438   PetscFunctionBegin;
3439   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3440 
3441   if (call == MAT_REUSE_MATRIX) {
3442     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3443     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3444     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3445 
3446     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3447     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3448 
3449     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3450     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3451 
3452     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3453 
3454   } else { /* call == MAT_INITIAL_MATRIX) */
3455     PetscBool flg;
3456 
3457     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3458     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3459 
3460     /* (1) iscol -> nonscalable iscol_local */
3461     /* Check for special case: each processor gets entire matrix columns */
3462     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3463     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3464     if (allcolumns) {
3465       iscol_sub = iscol_local;
3466       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3467       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3468 
3469     } else {
3470       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3471       PetscInt *idx,*cmap1,k;
3472       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3473       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3474       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3475       count = 0;
3476       k     = 0;
3477       for (i=0; i<Ncols; i++) {
3478         j = is_idx[i];
3479         if (j >= cstart && j < cend) {
3480           /* diagonal part of mat */
3481           idx[count]     = j;
3482           cmap1[count++] = i; /* column index in submat */
3483         } else if (Bn) {
3484           /* off-diagonal part of mat */
3485           if (j == garray[k]) {
3486             idx[count]     = j;
3487             cmap1[count++] = i;  /* column index in submat */
3488           } else if (j > garray[k]) {
3489             while (j > garray[k] && k < Bn-1) k++;
3490             if (j == garray[k]) {
3491               idx[count]     = j;
3492               cmap1[count++] = i; /* column index in submat */
3493             }
3494           }
3495         }
3496       }
3497       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3498 
3499       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3500       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3501       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3502 
3503       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3504     }
3505 
3506     /* (3) Create sequential Msub */
3507     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3508   }
3509 
3510   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3511   aij  = (Mat_SeqAIJ*)(Msub)->data;
3512   ii   = aij->i;
3513   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3514 
3515   /*
3516       m - number of local rows
3517       Ncols - number of columns (same on all processors)
3518       rstart - first row in new global matrix generated
3519   */
3520   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3521 
3522   if (call == MAT_INITIAL_MATRIX) {
3523     /* (4) Create parallel newmat */
3524     PetscMPIInt    rank,size;
3525     PetscInt       csize;
3526 
3527     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3528     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3529 
3530     /*
3531         Determine the number of non-zeros in the diagonal and off-diagonal
3532         portions of the matrix in order to do correct preallocation
3533     */
3534 
3535     /* first get start and end of "diagonal" columns */
3536     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3537     if (csize == PETSC_DECIDE) {
3538       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3539       if (mglobal == Ncols) { /* square matrix */
3540         nlocal = m;
3541       } else {
3542         nlocal = Ncols/size + ((Ncols % size) > rank);
3543       }
3544     } else {
3545       nlocal = csize;
3546     }
3547     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3548     rstart = rend - nlocal;
3549     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3550 
3551     /* next, compute all the lengths */
3552     jj    = aij->j;
3553     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3554     olens = dlens + m;
3555     for (i=0; i<m; i++) {
3556       jend = ii[i+1] - ii[i];
3557       olen = 0;
3558       dlen = 0;
3559       for (j=0; j<jend; j++) {
3560         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3561         else dlen++;
3562         jj++;
3563       }
3564       olens[i] = olen;
3565       dlens[i] = dlen;
3566     }
3567 
3568     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3569     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3570 
3571     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3572     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3573     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3574     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3575     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3576     ierr = PetscFree(dlens);CHKERRQ(ierr);
3577 
3578   } else { /* call == MAT_REUSE_MATRIX */
3579     M    = *newmat;
3580     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3581     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3582     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3583     /*
3584          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3585        rather than the slower MatSetValues().
3586     */
3587     M->was_assembled = PETSC_TRUE;
3588     M->assembled     = PETSC_FALSE;
3589   }
3590 
3591   /* (5) Set values of Msub to *newmat */
3592   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3593   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3594 
3595   jj   = aij->j;
3596   aa   = aij->a;
3597   for (i=0; i<m; i++) {
3598     row = rstart + i;
3599     nz  = ii[i+1] - ii[i];
3600     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3601     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3602     jj += nz; aa += nz;
3603   }
3604   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3605 
3606   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3608 
3609   ierr = PetscFree(colsub);CHKERRQ(ierr);
3610 
3611   /* save Msub, iscol_sub and iscmap used in processor for next request */
3612   if (call ==  MAT_INITIAL_MATRIX) {
3613     *newmat = M;
3614     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3615     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3616 
3617     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3618     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3619 
3620     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3621     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3622 
3623     if (iscol_local) {
3624       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3625       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3626     }
3627   }
3628   PetscFunctionReturn(0);
3629 }
3630 
3631 /*
3632     Not great since it makes two copies of the submatrix, first an SeqAIJ
3633   in local and then by concatenating the local matrices the end result.
3634   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3635 
3636   Note: This requires a sequential iscol with all indices.
3637 */
3638 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3639 {
3640   PetscErrorCode ierr;
3641   PetscMPIInt    rank,size;
3642   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3643   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3644   Mat            M,Mreuse;
3645   MatScalar      *aa,*vwork;
3646   MPI_Comm       comm;
3647   Mat_SeqAIJ     *aij;
3648   PetscBool      colflag,allcolumns=PETSC_FALSE;
3649 
3650   PetscFunctionBegin;
3651   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3652   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3654 
3655   /* Check for special case: each processor gets entire matrix columns */
3656   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3657   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3658   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3659 
3660   if (call ==  MAT_REUSE_MATRIX) {
3661     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3662     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3663     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3664   } else {
3665     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3666   }
3667 
3668   /*
3669       m - number of local rows
3670       n - number of columns (same on all processors)
3671       rstart - first row in new global matrix generated
3672   */
3673   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3674   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3675   if (call == MAT_INITIAL_MATRIX) {
3676     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3677     ii  = aij->i;
3678     jj  = aij->j;
3679 
3680     /*
3681         Determine the number of non-zeros in the diagonal and off-diagonal
3682         portions of the matrix in order to do correct preallocation
3683     */
3684 
3685     /* first get start and end of "diagonal" columns */
3686     if (csize == PETSC_DECIDE) {
3687       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3688       if (mglobal == n) { /* square matrix */
3689         nlocal = m;
3690       } else {
3691         nlocal = n/size + ((n % size) > rank);
3692       }
3693     } else {
3694       nlocal = csize;
3695     }
3696     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3697     rstart = rend - nlocal;
3698     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3699 
3700     /* next, compute all the lengths */
3701     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3702     olens = dlens + m;
3703     for (i=0; i<m; i++) {
3704       jend = ii[i+1] - ii[i];
3705       olen = 0;
3706       dlen = 0;
3707       for (j=0; j<jend; j++) {
3708         if (*jj < rstart || *jj >= rend) olen++;
3709         else dlen++;
3710         jj++;
3711       }
3712       olens[i] = olen;
3713       dlens[i] = dlen;
3714     }
3715     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3716     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3717     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3718     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3719     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3720     ierr = PetscFree(dlens);CHKERRQ(ierr);
3721   } else {
3722     PetscInt ml,nl;
3723 
3724     M    = *newmat;
3725     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3726     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3727     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3728     /*
3729          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3730        rather than the slower MatSetValues().
3731     */
3732     M->was_assembled = PETSC_TRUE;
3733     M->assembled     = PETSC_FALSE;
3734   }
3735   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3736   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3737   ii   = aij->i;
3738   jj   = aij->j;
3739   aa   = aij->a;
3740   for (i=0; i<m; i++) {
3741     row   = rstart + i;
3742     nz    = ii[i+1] - ii[i];
3743     cwork = jj;     jj += nz;
3744     vwork = aa;     aa += nz;
3745     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3746   }
3747 
3748   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3749   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3750   *newmat = M;
3751 
3752   /* save submatrix used in processor for next request */
3753   if (call ==  MAT_INITIAL_MATRIX) {
3754     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3755     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3756   }
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3761 {
3762   PetscInt       m,cstart, cend,j,nnz,i,d;
3763   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3764   const PetscInt *JJ;
3765   PetscErrorCode ierr;
3766   PetscBool      nooffprocentries;
3767 
3768   PetscFunctionBegin;
3769   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3770 
3771   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3772   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3773   m      = B->rmap->n;
3774   cstart = B->cmap->rstart;
3775   cend   = B->cmap->rend;
3776   rstart = B->rmap->rstart;
3777 
3778   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3779 
3780   if (PetscDefined(USE_DEBUG)) {
3781     for (i=0; i<m; i++) {
3782       nnz = Ii[i+1]- Ii[i];
3783       JJ  = J + Ii[i];
3784       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3785       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3786       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3787     }
3788   }
3789 
3790   for (i=0; i<m; i++) {
3791     nnz     = Ii[i+1]- Ii[i];
3792     JJ      = J + Ii[i];
3793     nnz_max = PetscMax(nnz_max,nnz);
3794     d       = 0;
3795     for (j=0; j<nnz; j++) {
3796       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3797     }
3798     d_nnz[i] = d;
3799     o_nnz[i] = nnz - d;
3800   }
3801   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3802   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3803 
3804   for (i=0; i<m; i++) {
3805     ii   = i + rstart;
3806     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3807   }
3808   nooffprocentries    = B->nooffprocentries;
3809   B->nooffprocentries = PETSC_TRUE;
3810   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3812   B->nooffprocentries = nooffprocentries;
3813 
3814   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3815   PetscFunctionReturn(0);
3816 }
3817 
3818 /*@
3819    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3820    (the default parallel PETSc format).
3821 
3822    Collective
3823 
3824    Input Parameters:
3825 +  B - the matrix
3826 .  i - the indices into j for the start of each local row (starts with zero)
3827 .  j - the column indices for each local row (starts with zero)
3828 -  v - optional values in the matrix
3829 
3830    Level: developer
3831 
3832    Notes:
3833        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3834      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3835      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3836 
3837        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3838 
3839        The format which is used for the sparse matrix input, is equivalent to a
3840     row-major ordering.. i.e for the following matrix, the input data expected is
3841     as shown
3842 
3843 $        1 0 0
3844 $        2 0 3     P0
3845 $       -------
3846 $        4 5 6     P1
3847 $
3848 $     Process0 [P0]: rows_owned=[0,1]
3849 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3850 $        j =  {0,0,2}  [size = 3]
3851 $        v =  {1,2,3}  [size = 3]
3852 $
3853 $     Process1 [P1]: rows_owned=[2]
3854 $        i =  {0,3}    [size = nrow+1  = 1+1]
3855 $        j =  {0,1,2}  [size = 3]
3856 $        v =  {4,5,6}  [size = 3]
3857 
3858 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3859           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3860 @*/
3861 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3862 {
3863   PetscErrorCode ierr;
3864 
3865   PetscFunctionBegin;
3866   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3867   PetscFunctionReturn(0);
3868 }
3869 
3870 /*@C
3871    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3872    (the default parallel PETSc format).  For good matrix assembly performance
3873    the user should preallocate the matrix storage by setting the parameters
3874    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3875    performance can be increased by more than a factor of 50.
3876 
3877    Collective
3878 
3879    Input Parameters:
3880 +  B - the matrix
3881 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3882            (same value is used for all local rows)
3883 .  d_nnz - array containing the number of nonzeros in the various rows of the
3884            DIAGONAL portion of the local submatrix (possibly different for each row)
3885            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3886            The size of this array is equal to the number of local rows, i.e 'm'.
3887            For matrices that will be factored, you must leave room for (and set)
3888            the diagonal entry even if it is zero.
3889 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3890            submatrix (same value is used for all local rows).
3891 -  o_nnz - array containing the number of nonzeros in the various rows of the
3892            OFF-DIAGONAL portion of the local submatrix (possibly different for
3893            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3894            structure. The size of this array is equal to the number
3895            of local rows, i.e 'm'.
3896 
3897    If the *_nnz parameter is given then the *_nz parameter is ignored
3898 
3899    The AIJ format (also called the Yale sparse matrix format or
3900    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3901    storage.  The stored row and column indices begin with zero.
3902    See Users-Manual: ch_mat for details.
3903 
3904    The parallel matrix is partitioned such that the first m0 rows belong to
3905    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3906    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3907 
3908    The DIAGONAL portion of the local submatrix of a processor can be defined
3909    as the submatrix which is obtained by extraction the part corresponding to
3910    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3911    first row that belongs to the processor, r2 is the last row belonging to
3912    the this processor, and c1-c2 is range of indices of the local part of a
3913    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3914    common case of a square matrix, the row and column ranges are the same and
3915    the DIAGONAL part is also square. The remaining portion of the local
3916    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3917 
3918    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3919 
3920    You can call MatGetInfo() to get information on how effective the preallocation was;
3921    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3922    You can also run with the option -info and look for messages with the string
3923    malloc in them to see if additional memory allocation was needed.
3924 
3925    Example usage:
3926 
3927    Consider the following 8x8 matrix with 34 non-zero values, that is
3928    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3929    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3930    as follows:
3931 
3932 .vb
3933             1  2  0  |  0  3  0  |  0  4
3934     Proc0   0  5  6  |  7  0  0  |  8  0
3935             9  0 10  | 11  0  0  | 12  0
3936     -------------------------------------
3937            13  0 14  | 15 16 17  |  0  0
3938     Proc1   0 18  0  | 19 20 21  |  0  0
3939             0  0  0  | 22 23  0  | 24  0
3940     -------------------------------------
3941     Proc2  25 26 27  |  0  0 28  | 29  0
3942            30  0  0  | 31 32 33  |  0 34
3943 .ve
3944 
3945    This can be represented as a collection of submatrices as:
3946 
3947 .vb
3948       A B C
3949       D E F
3950       G H I
3951 .ve
3952 
3953    Where the submatrices A,B,C are owned by proc0, D,E,F are
3954    owned by proc1, G,H,I are owned by proc2.
3955 
3956    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3957    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3958    The 'M','N' parameters are 8,8, and have the same values on all procs.
3959 
3960    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3961    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3962    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3963    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3964    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3965    matrix, ans [DF] as another SeqAIJ matrix.
3966 
3967    When d_nz, o_nz parameters are specified, d_nz storage elements are
3968    allocated for every row of the local diagonal submatrix, and o_nz
3969    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3970    One way to choose d_nz and o_nz is to use the max nonzerors per local
3971    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3972    In this case, the values of d_nz,o_nz are:
3973 .vb
3974      proc0 : dnz = 2, o_nz = 2
3975      proc1 : dnz = 3, o_nz = 2
3976      proc2 : dnz = 1, o_nz = 4
3977 .ve
3978    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3979    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3980    for proc3. i.e we are using 12+15+10=37 storage locations to store
3981    34 values.
3982 
3983    When d_nnz, o_nnz parameters are specified, the storage is specified
3984    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3985    In the above case the values for d_nnz,o_nnz are:
3986 .vb
3987      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3988      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3989      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3990 .ve
3991    Here the space allocated is sum of all the above values i.e 34, and
3992    hence pre-allocation is perfect.
3993 
3994    Level: intermediate
3995 
3996 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3997           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3998 @*/
3999 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4000 {
4001   PetscErrorCode ierr;
4002 
4003   PetscFunctionBegin;
4004   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4005   PetscValidType(B,1);
4006   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 /*@
4011      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4012          CSR format for the local rows.
4013 
4014    Collective
4015 
4016    Input Parameters:
4017 +  comm - MPI communicator
4018 .  m - number of local rows (Cannot be PETSC_DECIDE)
4019 .  n - This value should be the same as the local size used in creating the
4020        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4021        calculated if N is given) For square matrices n is almost always m.
4022 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4023 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4024 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4025 .   j - column indices
4026 -   a - matrix values
4027 
4028    Output Parameter:
4029 .   mat - the matrix
4030 
4031    Level: intermediate
4032 
4033    Notes:
4034        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4035      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4036      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4037 
4038        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4039 
4040        The format which is used for the sparse matrix input, is equivalent to a
4041     row-major ordering.. i.e for the following matrix, the input data expected is
4042     as shown
4043 
4044        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4045 
4046 $        1 0 0
4047 $        2 0 3     P0
4048 $       -------
4049 $        4 5 6     P1
4050 $
4051 $     Process0 [P0]: rows_owned=[0,1]
4052 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4053 $        j =  {0,0,2}  [size = 3]
4054 $        v =  {1,2,3}  [size = 3]
4055 $
4056 $     Process1 [P1]: rows_owned=[2]
4057 $        i =  {0,3}    [size = nrow+1  = 1+1]
4058 $        j =  {0,1,2}  [size = 3]
4059 $        v =  {4,5,6}  [size = 3]
4060 
4061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4062           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4063 @*/
4064 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4065 {
4066   PetscErrorCode ierr;
4067 
4068   PetscFunctionBegin;
4069   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4070   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4071   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4072   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4073   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4074   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4075   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4076   PetscFunctionReturn(0);
4077 }
4078 
4079 /*@
4080      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4081          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4082 
4083    Collective
4084 
4085    Input Parameters:
4086 +  mat - the matrix
4087 .  m - number of local rows (Cannot be PETSC_DECIDE)
4088 .  n - This value should be the same as the local size used in creating the
4089        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4090        calculated if N is given) For square matrices n is almost always m.
4091 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4092 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4093 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4094 .  J - column indices
4095 -  v - matrix values
4096 
4097    Level: intermediate
4098 
4099 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4100           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4101 @*/
4102 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4103 {
4104   PetscErrorCode ierr;
4105   PetscInt       cstart,nnz,i,j;
4106   PetscInt       *ld;
4107   PetscBool      nooffprocentries;
4108   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4109   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4110   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4111   const PetscInt *Adi = Ad->i;
4112   PetscInt       ldi,Iii,md;
4113 
4114   PetscFunctionBegin;
4115   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4116   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4117   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4118   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4119 
4120   cstart = mat->cmap->rstart;
4121   if (!Aij->ld) {
4122     /* count number of entries below block diagonal */
4123     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4124     Aij->ld = ld;
4125     for (i=0; i<m; i++) {
4126       nnz  = Ii[i+1]- Ii[i];
4127       j     = 0;
4128       while  (J[j] < cstart && j < nnz) {j++;}
4129       J    += nnz;
4130       ld[i] = j;
4131     }
4132   } else {
4133     ld = Aij->ld;
4134   }
4135 
4136   for (i=0; i<m; i++) {
4137     nnz  = Ii[i+1]- Ii[i];
4138     Iii  = Ii[i];
4139     ldi  = ld[i];
4140     md   = Adi[i+1]-Adi[i];
4141     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4142     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4143     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4144     ad  += md;
4145     ao  += nnz - md;
4146   }
4147   nooffprocentries      = mat->nooffprocentries;
4148   mat->nooffprocentries = PETSC_TRUE;
4149   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4150   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4151   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4152   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4153   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4154   mat->nooffprocentries = nooffprocentries;
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 /*@C
4159    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4160    (the default parallel PETSc format).  For good matrix assembly performance
4161    the user should preallocate the matrix storage by setting the parameters
4162    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4163    performance can be increased by more than a factor of 50.
4164 
4165    Collective
4166 
4167    Input Parameters:
4168 +  comm - MPI communicator
4169 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4170            This value should be the same as the local size used in creating the
4171            y vector for the matrix-vector product y = Ax.
4172 .  n - This value should be the same as the local size used in creating the
4173        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4174        calculated if N is given) For square matrices n is almost always m.
4175 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4176 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4177 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4178            (same value is used for all local rows)
4179 .  d_nnz - array containing the number of nonzeros in the various rows of the
4180            DIAGONAL portion of the local submatrix (possibly different for each row)
4181            or NULL, if d_nz is used to specify the nonzero structure.
4182            The size of this array is equal to the number of local rows, i.e 'm'.
4183 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4184            submatrix (same value is used for all local rows).
4185 -  o_nnz - array containing the number of nonzeros in the various rows of the
4186            OFF-DIAGONAL portion of the local submatrix (possibly different for
4187            each row) or NULL, if o_nz is used to specify the nonzero
4188            structure. The size of this array is equal to the number
4189            of local rows, i.e 'm'.
4190 
4191    Output Parameter:
4192 .  A - the matrix
4193 
4194    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4195    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4196    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4197 
4198    Notes:
4199    If the *_nnz parameter is given then the *_nz parameter is ignored
4200 
4201    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4202    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4203    storage requirements for this matrix.
4204 
4205    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4206    processor than it must be used on all processors that share the object for
4207    that argument.
4208 
4209    The user MUST specify either the local or global matrix dimensions
4210    (possibly both).
4211 
4212    The parallel matrix is partitioned across processors such that the
4213    first m0 rows belong to process 0, the next m1 rows belong to
4214    process 1, the next m2 rows belong to process 2 etc.. where
4215    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4216    values corresponding to [m x N] submatrix.
4217 
4218    The columns are logically partitioned with the n0 columns belonging
4219    to 0th partition, the next n1 columns belonging to the next
4220    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4221 
4222    The DIAGONAL portion of the local submatrix on any given processor
4223    is the submatrix corresponding to the rows and columns m,n
4224    corresponding to the given processor. i.e diagonal matrix on
4225    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4226    etc. The remaining portion of the local submatrix [m x (N-n)]
4227    constitute the OFF-DIAGONAL portion. The example below better
4228    illustrates this concept.
4229 
4230    For a square global matrix we define each processor's diagonal portion
4231    to be its local rows and the corresponding columns (a square submatrix);
4232    each processor's off-diagonal portion encompasses the remainder of the
4233    local matrix (a rectangular submatrix).
4234 
4235    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4236 
4237    When calling this routine with a single process communicator, a matrix of
4238    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4239    type of communicator, use the construction mechanism
4240 .vb
4241      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4242 .ve
4243 
4244 $     MatCreate(...,&A);
4245 $     MatSetType(A,MATMPIAIJ);
4246 $     MatSetSizes(A, m,n,M,N);
4247 $     MatMPIAIJSetPreallocation(A,...);
4248 
4249    By default, this format uses inodes (identical nodes) when possible.
4250    We search for consecutive rows with the same nonzero structure, thereby
4251    reusing matrix information to achieve increased efficiency.
4252 
4253    Options Database Keys:
4254 +  -mat_no_inode  - Do not use inodes
4255 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4256 
4257 
4258 
4259    Example usage:
4260 
4261    Consider the following 8x8 matrix with 34 non-zero values, that is
4262    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4263    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4264    as follows
4265 
4266 .vb
4267             1  2  0  |  0  3  0  |  0  4
4268     Proc0   0  5  6  |  7  0  0  |  8  0
4269             9  0 10  | 11  0  0  | 12  0
4270     -------------------------------------
4271            13  0 14  | 15 16 17  |  0  0
4272     Proc1   0 18  0  | 19 20 21  |  0  0
4273             0  0  0  | 22 23  0  | 24  0
4274     -------------------------------------
4275     Proc2  25 26 27  |  0  0 28  | 29  0
4276            30  0  0  | 31 32 33  |  0 34
4277 .ve
4278 
4279    This can be represented as a collection of submatrices as
4280 
4281 .vb
4282       A B C
4283       D E F
4284       G H I
4285 .ve
4286 
4287    Where the submatrices A,B,C are owned by proc0, D,E,F are
4288    owned by proc1, G,H,I are owned by proc2.
4289 
4290    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4291    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4292    The 'M','N' parameters are 8,8, and have the same values on all procs.
4293 
4294    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4295    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4296    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4297    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4298    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4299    matrix, ans [DF] as another SeqAIJ matrix.
4300 
4301    When d_nz, o_nz parameters are specified, d_nz storage elements are
4302    allocated for every row of the local diagonal submatrix, and o_nz
4303    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4304    One way to choose d_nz and o_nz is to use the max nonzerors per local
4305    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4306    In this case, the values of d_nz,o_nz are
4307 .vb
4308      proc0 : dnz = 2, o_nz = 2
4309      proc1 : dnz = 3, o_nz = 2
4310      proc2 : dnz = 1, o_nz = 4
4311 .ve
4312    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4313    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4314    for proc3. i.e we are using 12+15+10=37 storage locations to store
4315    34 values.
4316 
4317    When d_nnz, o_nnz parameters are specified, the storage is specified
4318    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4319    In the above case the values for d_nnz,o_nnz are
4320 .vb
4321      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4322      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4323      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4324 .ve
4325    Here the space allocated is sum of all the above values i.e 34, and
4326    hence pre-allocation is perfect.
4327 
4328    Level: intermediate
4329 
4330 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4331           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4332 @*/
4333 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4334 {
4335   PetscErrorCode ierr;
4336   PetscMPIInt    size;
4337 
4338   PetscFunctionBegin;
4339   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4340   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4341   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4342   if (size > 1) {
4343     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4344     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4345   } else {
4346     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4347     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4348   }
4349   PetscFunctionReturn(0);
4350 }
4351 
4352 /*@C
4353   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4354 
4355   Not collective
4356 
4357   Input Parameter:
4358 . A - The MPIAIJ matrix
4359 
4360   Output Parameters:
4361 + Ad - The local diagonal block as a SeqAIJ matrix
4362 . Ao - The local off-diagonal block as a SeqAIJ matrix
4363 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4364 
4365   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4366   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4367   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4368   local column numbers to global column numbers in the original matrix.
4369 
4370   Level: intermediate
4371 
4372 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ
4373 @*/
4374 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4375 {
4376   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4377   PetscBool      flg;
4378   PetscErrorCode ierr;
4379 
4380   PetscFunctionBegin;
4381   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4382   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4383   if (Ad)     *Ad     = a->A;
4384   if (Ao)     *Ao     = a->B;
4385   if (colmap) *colmap = a->garray;
4386   PetscFunctionReturn(0);
4387 }
4388 
4389 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4390 {
4391   PetscErrorCode ierr;
4392   PetscInt       m,N,i,rstart,nnz,Ii;
4393   PetscInt       *indx;
4394   PetscScalar    *values;
4395 
4396   PetscFunctionBegin;
4397   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4398   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4399     PetscInt       *dnz,*onz,sum,bs,cbs;
4400 
4401     if (n == PETSC_DECIDE) {
4402       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4403     }
4404     /* Check sum(n) = N */
4405     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4406     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4407 
4408     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4409     rstart -= m;
4410 
4411     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4412     for (i=0; i<m; i++) {
4413       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4414       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4415       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4416     }
4417 
4418     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4419     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4420     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4421     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4422     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4423     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4424     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4425     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4426   }
4427 
4428   /* numeric phase */
4429   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4430   for (i=0; i<m; i++) {
4431     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4432     Ii   = i + rstart;
4433     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4434     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4435   }
4436   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4437   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4438   PetscFunctionReturn(0);
4439 }
4440 
4441 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4442 {
4443   PetscErrorCode    ierr;
4444   PetscMPIInt       rank;
4445   PetscInt          m,N,i,rstart,nnz;
4446   size_t            len;
4447   const PetscInt    *indx;
4448   PetscViewer       out;
4449   char              *name;
4450   Mat               B;
4451   const PetscScalar *values;
4452 
4453   PetscFunctionBegin;
4454   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4455   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4456   /* Should this be the type of the diagonal block of A? */
4457   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4458   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4459   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4460   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4461   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4462   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4463   for (i=0; i<m; i++) {
4464     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4465     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4466     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4467   }
4468   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4469   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4470 
4471   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4472   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4473   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4474   sprintf(name,"%s.%d",outfile,rank);
4475   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4476   ierr = PetscFree(name);CHKERRQ(ierr);
4477   ierr = MatView(B,out);CHKERRQ(ierr);
4478   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4479   ierr = MatDestroy(&B);CHKERRQ(ierr);
4480   PetscFunctionReturn(0);
4481 }
4482 
4483 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4484 {
4485   PetscErrorCode      ierr;
4486   Mat_Merge_SeqsToMPI *merge;
4487   PetscContainer      container;
4488 
4489   PetscFunctionBegin;
4490   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4491   if (container) {
4492     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4505     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4506     ierr = PetscFree(merge);CHKERRQ(ierr);
4507     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4508   }
4509   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #include <../src/mat/utils/freespace.h>
4514 #include <petscbt.h>
4515 
4516 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4517 {
4518   PetscErrorCode      ierr;
4519   MPI_Comm            comm;
4520   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4521   PetscMPIInt         size,rank,taga,*len_s;
4522   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4523   PetscInt            proc,m;
4524   PetscInt            **buf_ri,**buf_rj;
4525   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4526   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4527   MPI_Request         *s_waits,*r_waits;
4528   MPI_Status          *status;
4529   MatScalar           *aa=a->a;
4530   MatScalar           **abuf_r,*ba_i;
4531   Mat_Merge_SeqsToMPI *merge;
4532   PetscContainer      container;
4533 
4534   PetscFunctionBegin;
4535   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4536   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4537 
4538   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4539   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4540 
4541   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4542   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4543 
4544   bi     = merge->bi;
4545   bj     = merge->bj;
4546   buf_ri = merge->buf_ri;
4547   buf_rj = merge->buf_rj;
4548 
4549   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4550   owners = merge->rowmap->range;
4551   len_s  = merge->len_s;
4552 
4553   /* send and recv matrix values */
4554   /*-----------------------------*/
4555   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4556   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4557 
4558   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4559   for (proc=0,k=0; proc<size; proc++) {
4560     if (!len_s[proc]) continue;
4561     i    = owners[proc];
4562     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4563     k++;
4564   }
4565 
4566   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4567   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4568   ierr = PetscFree(status);CHKERRQ(ierr);
4569 
4570   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4571   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4572 
4573   /* insert mat values of mpimat */
4574   /*----------------------------*/
4575   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4576   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4577 
4578   for (k=0; k<merge->nrecv; k++) {
4579     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4580     nrows       = *(buf_ri_k[k]);
4581     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4582     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4583   }
4584 
4585   /* set values of ba */
4586   m = merge->rowmap->n;
4587   for (i=0; i<m; i++) {
4588     arow = owners[rank] + i;
4589     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4590     bnzi = bi[i+1] - bi[i];
4591     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4592 
4593     /* add local non-zero vals of this proc's seqmat into ba */
4594     anzi   = ai[arow+1] - ai[arow];
4595     aj     = a->j + ai[arow];
4596     aa     = a->a + ai[arow];
4597     nextaj = 0;
4598     for (j=0; nextaj<anzi; j++) {
4599       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4600         ba_i[j] += aa[nextaj++];
4601       }
4602     }
4603 
4604     /* add received vals into ba */
4605     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4606       /* i-th row */
4607       if (i == *nextrow[k]) {
4608         anzi   = *(nextai[k]+1) - *nextai[k];
4609         aj     = buf_rj[k] + *(nextai[k]);
4610         aa     = abuf_r[k] + *(nextai[k]);
4611         nextaj = 0;
4612         for (j=0; nextaj<anzi; j++) {
4613           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4614             ba_i[j] += aa[nextaj++];
4615           }
4616         }
4617         nextrow[k]++; nextai[k]++;
4618       }
4619     }
4620     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4621   }
4622   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4623   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4624 
4625   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4626   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4627   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4628   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4629   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4630   PetscFunctionReturn(0);
4631 }
4632 
4633 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4634 {
4635   PetscErrorCode      ierr;
4636   Mat                 B_mpi;
4637   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4638   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4639   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4640   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4641   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4642   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4643   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4644   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4645   MPI_Status          *status;
4646   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4647   PetscBT             lnkbt;
4648   Mat_Merge_SeqsToMPI *merge;
4649   PetscContainer      container;
4650 
4651   PetscFunctionBegin;
4652   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4653 
4654   /* make sure it is a PETSc comm */
4655   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4657   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4658 
4659   ierr = PetscNew(&merge);CHKERRQ(ierr);
4660   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4661 
4662   /* determine row ownership */
4663   /*---------------------------------------------------------*/
4664   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4669   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4670   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4671 
4672   m      = merge->rowmap->n;
4673   owners = merge->rowmap->range;
4674 
4675   /* determine the number of messages to send, their lengths */
4676   /*---------------------------------------------------------*/
4677   len_s = merge->len_s;
4678 
4679   len          = 0; /* length of buf_si[] */
4680   merge->nsend = 0;
4681   for (proc=0; proc<size; proc++) {
4682     len_si[proc] = 0;
4683     if (proc == rank) {
4684       len_s[proc] = 0;
4685     } else {
4686       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4687       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4688     }
4689     if (len_s[proc]) {
4690       merge->nsend++;
4691       nrows = 0;
4692       for (i=owners[proc]; i<owners[proc+1]; i++) {
4693         if (ai[i+1] > ai[i]) nrows++;
4694       }
4695       len_si[proc] = 2*(nrows+1);
4696       len         += len_si[proc];
4697     }
4698   }
4699 
4700   /* determine the number and length of messages to receive for ij-structure */
4701   /*-------------------------------------------------------------------------*/
4702   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4703   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4704 
4705   /* post the Irecv of j-structure */
4706   /*-------------------------------*/
4707   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4708   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4709 
4710   /* post the Isend of j-structure */
4711   /*--------------------------------*/
4712   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4713 
4714   for (proc=0, k=0; proc<size; proc++) {
4715     if (!len_s[proc]) continue;
4716     i    = owners[proc];
4717     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4718     k++;
4719   }
4720 
4721   /* receives and sends of j-structure are complete */
4722   /*------------------------------------------------*/
4723   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4724   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4725 
4726   /* send and recv i-structure */
4727   /*---------------------------*/
4728   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4729   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4730 
4731   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4732   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4733   for (proc=0,k=0; proc<size; proc++) {
4734     if (!len_s[proc]) continue;
4735     /* form outgoing message for i-structure:
4736          buf_si[0]:                 nrows to be sent
4737                [1:nrows]:           row index (global)
4738                [nrows+1:2*nrows+1]: i-structure index
4739     */
4740     /*-------------------------------------------*/
4741     nrows       = len_si[proc]/2 - 1;
4742     buf_si_i    = buf_si + nrows+1;
4743     buf_si[0]   = nrows;
4744     buf_si_i[0] = 0;
4745     nrows       = 0;
4746     for (i=owners[proc]; i<owners[proc+1]; i++) {
4747       anzi = ai[i+1] - ai[i];
4748       if (anzi) {
4749         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4750         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4751         nrows++;
4752       }
4753     }
4754     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4755     k++;
4756     buf_si += len_si[proc];
4757   }
4758 
4759   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4760   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4761 
4762   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4763   for (i=0; i<merge->nrecv; i++) {
4764     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4765   }
4766 
4767   ierr = PetscFree(len_si);CHKERRQ(ierr);
4768   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4769   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4770   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4771   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4772   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4773   ierr = PetscFree(status);CHKERRQ(ierr);
4774 
4775   /* compute a local seq matrix in each processor */
4776   /*----------------------------------------------*/
4777   /* allocate bi array and free space for accumulating nonzero column info */
4778   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4779   bi[0] = 0;
4780 
4781   /* create and initialize a linked list */
4782   nlnk = N+1;
4783   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4784 
4785   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4786   len  = ai[owners[rank+1]] - ai[owners[rank]];
4787   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4788 
4789   current_space = free_space;
4790 
4791   /* determine symbolic info for each local row */
4792   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4793 
4794   for (k=0; k<merge->nrecv; k++) {
4795     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4796     nrows       = *buf_ri_k[k];
4797     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4798     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4799   }
4800 
4801   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4802   len  = 0;
4803   for (i=0; i<m; i++) {
4804     bnzi = 0;
4805     /* add local non-zero cols of this proc's seqmat into lnk */
4806     arow  = owners[rank] + i;
4807     anzi  = ai[arow+1] - ai[arow];
4808     aj    = a->j + ai[arow];
4809     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4810     bnzi += nlnk;
4811     /* add received col data into lnk */
4812     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4813       if (i == *nextrow[k]) { /* i-th row */
4814         anzi  = *(nextai[k]+1) - *nextai[k];
4815         aj    = buf_rj[k] + *nextai[k];
4816         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4817         bnzi += nlnk;
4818         nextrow[k]++; nextai[k]++;
4819       }
4820     }
4821     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4822 
4823     /* if free space is not available, make more free space */
4824     if (current_space->local_remaining<bnzi) {
4825       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4826       nspacedouble++;
4827     }
4828     /* copy data into free space, then initialize lnk */
4829     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4830     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4831 
4832     current_space->array           += bnzi;
4833     current_space->local_used      += bnzi;
4834     current_space->local_remaining -= bnzi;
4835 
4836     bi[i+1] = bi[i] + bnzi;
4837   }
4838 
4839   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4840 
4841   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4842   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4843   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4844 
4845   /* create symbolic parallel matrix B_mpi */
4846   /*---------------------------------------*/
4847   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4848   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4849   if (n==PETSC_DECIDE) {
4850     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4851   } else {
4852     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4853   }
4854   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4855   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4856   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4857   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4858   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4859 
4860   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4861   B_mpi->assembled    = PETSC_FALSE;
4862   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4863   merge->bi           = bi;
4864   merge->bj           = bj;
4865   merge->buf_ri       = buf_ri;
4866   merge->buf_rj       = buf_rj;
4867   merge->coi          = NULL;
4868   merge->coj          = NULL;
4869   merge->owners_co    = NULL;
4870 
4871   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4872 
4873   /* attach the supporting struct to B_mpi for reuse */
4874   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4875   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4876   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4877   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4878   *mpimat = B_mpi;
4879 
4880   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4881   PetscFunctionReturn(0);
4882 }
4883 
4884 /*@C
4885       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4886                  matrices from each processor
4887 
4888     Collective
4889 
4890    Input Parameters:
4891 +    comm - the communicators the parallel matrix will live on
4892 .    seqmat - the input sequential matrices
4893 .    m - number of local rows (or PETSC_DECIDE)
4894 .    n - number of local columns (or PETSC_DECIDE)
4895 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4896 
4897    Output Parameter:
4898 .    mpimat - the parallel matrix generated
4899 
4900     Level: advanced
4901 
4902    Notes:
4903      The dimensions of the sequential matrix in each processor MUST be the same.
4904      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4905      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4906 @*/
4907 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4908 {
4909   PetscErrorCode ierr;
4910   PetscMPIInt    size;
4911 
4912   PetscFunctionBegin;
4913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4914   if (size == 1) {
4915     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4916     if (scall == MAT_INITIAL_MATRIX) {
4917       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4918     } else {
4919       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4920     }
4921     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4922     PetscFunctionReturn(0);
4923   }
4924   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925   if (scall == MAT_INITIAL_MATRIX) {
4926     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4927   }
4928   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4929   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 /*@
4934      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4935           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4936           with MatGetSize()
4937 
4938     Not Collective
4939 
4940    Input Parameters:
4941 +    A - the matrix
4942 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4943 
4944    Output Parameter:
4945 .    A_loc - the local sequential matrix generated
4946 
4947     Level: developer
4948 
4949    Notes:
4950      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4951      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4952      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4953      modify the values of the returned A_loc.
4954 
4955 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4956 
4957 @*/
4958 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4959 {
4960   PetscErrorCode ierr;
4961   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4962   Mat_SeqAIJ     *mat,*a,*b;
4963   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4964   MatScalar      *aa,*ba,*cam;
4965   PetscScalar    *ca;
4966   PetscMPIInt    size;
4967   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4968   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4969   PetscBool      match;
4970 
4971   PetscFunctionBegin;
4972   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4973   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4974   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4975   if (size == 1) {
4976     if (scall == MAT_INITIAL_MATRIX) {
4977       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4978       *A_loc = mpimat->A;
4979     } else if (scall == MAT_REUSE_MATRIX) {
4980       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4981     }
4982     PetscFunctionReturn(0);
4983   }
4984 
4985   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4986   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4987   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4988   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4989   aa = a->a; ba = b->a;
4990   if (scall == MAT_INITIAL_MATRIX) {
4991     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4992     ci[0] = 0;
4993     for (i=0; i<am; i++) {
4994       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4995     }
4996     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4997     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4998     k    = 0;
4999     for (i=0; i<am; i++) {
5000       ncols_o = bi[i+1] - bi[i];
5001       ncols_d = ai[i+1] - ai[i];
5002       /* off-diagonal portion of A */
5003       for (jo=0; jo<ncols_o; jo++) {
5004         col = cmap[*bj];
5005         if (col >= cstart) break;
5006         cj[k]   = col; bj++;
5007         ca[k++] = *ba++;
5008       }
5009       /* diagonal portion of A */
5010       for (j=0; j<ncols_d; j++) {
5011         cj[k]   = cstart + *aj++;
5012         ca[k++] = *aa++;
5013       }
5014       /* off-diagonal portion of A */
5015       for (j=jo; j<ncols_o; j++) {
5016         cj[k]   = cmap[*bj++];
5017         ca[k++] = *ba++;
5018       }
5019     }
5020     /* put together the new matrix */
5021     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5022     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5023     /* Since these are PETSc arrays, change flags to free them as necessary. */
5024     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5025     mat->free_a  = PETSC_TRUE;
5026     mat->free_ij = PETSC_TRUE;
5027     mat->nonew   = 0;
5028   } else if (scall == MAT_REUSE_MATRIX) {
5029     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5030     ci = mat->i; cj = mat->j; cam = mat->a;
5031     for (i=0; i<am; i++) {
5032       /* off-diagonal portion of A */
5033       ncols_o = bi[i+1] - bi[i];
5034       for (jo=0; jo<ncols_o; jo++) {
5035         col = cmap[*bj];
5036         if (col >= cstart) break;
5037         *cam++ = *ba++; bj++;
5038       }
5039       /* diagonal portion of A */
5040       ncols_d = ai[i+1] - ai[i];
5041       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5042       /* off-diagonal portion of A */
5043       for (j=jo; j<ncols_o; j++) {
5044         *cam++ = *ba++; bj++;
5045       }
5046     }
5047   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5048   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5049   PetscFunctionReturn(0);
5050 }
5051 
5052 /*@C
5053      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5054 
5055     Not Collective
5056 
5057    Input Parameters:
5058 +    A - the matrix
5059 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5060 -    row, col - index sets of rows and columns to extract (or NULL)
5061 
5062    Output Parameter:
5063 .    A_loc - the local sequential matrix generated
5064 
5065     Level: developer
5066 
5067 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5068 
5069 @*/
5070 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5071 {
5072   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5073   PetscErrorCode ierr;
5074   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5075   IS             isrowa,iscola;
5076   Mat            *aloc;
5077   PetscBool      match;
5078 
5079   PetscFunctionBegin;
5080   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5081   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5082   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5083   if (!row) {
5084     start = A->rmap->rstart; end = A->rmap->rend;
5085     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5086   } else {
5087     isrowa = *row;
5088   }
5089   if (!col) {
5090     start = A->cmap->rstart;
5091     cmap  = a->garray;
5092     nzA   = a->A->cmap->n;
5093     nzB   = a->B->cmap->n;
5094     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5095     ncols = 0;
5096     for (i=0; i<nzB; i++) {
5097       if (cmap[i] < start) idx[ncols++] = cmap[i];
5098       else break;
5099     }
5100     imark = i;
5101     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5102     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5103     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5104   } else {
5105     iscola = *col;
5106   }
5107   if (scall != MAT_INITIAL_MATRIX) {
5108     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5109     aloc[0] = *A_loc;
5110   }
5111   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5112   if (!col) { /* attach global id of condensed columns */
5113     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5114   }
5115   *A_loc = aloc[0];
5116   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5117   if (!row) {
5118     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5119   }
5120   if (!col) {
5121     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5122   }
5123   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5124   PetscFunctionReturn(0);
5125 }
5126 
5127 /*
5128  * Destroy a mat that may be composed with PetscSF communication objects.
5129  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5130  * */
5131 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5132 {
5133   PetscSF          sf,osf;
5134   IS               map;
5135   PetscErrorCode   ierr;
5136 
5137   PetscFunctionBegin;
5138   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5139   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5140   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5141   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5142   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5143   ierr = ISDestroy(&map);CHKERRQ(ierr);
5144   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5145   PetscFunctionReturn(0);
5146 }
5147 
5148 /*
5149  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5150  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5151  * on a global size.
5152  * */
5153 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5154 {
5155   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5156   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5157   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5158   PetscMPIInt              owner;
5159   PetscSFNode              *iremote,*oiremote;
5160   const PetscInt           *lrowindices;
5161   PetscErrorCode           ierr;
5162   PetscSF                  sf,osf;
5163   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5164   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5165   MPI_Comm                 comm;
5166   ISLocalToGlobalMapping   mapping;
5167 
5168   PetscFunctionBegin;
5169   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5170   /* plocalsize is the number of roots
5171    * nrows is the number of leaves
5172    * */
5173   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5174   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5175   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5176   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5177   for (i=0;i<nrows;i++) {
5178     /* Find a remote index and an owner for a row
5179      * The row could be local or remote
5180      * */
5181     owner = 0;
5182     lidx  = 0;
5183     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5184     iremote[i].index = lidx;
5185     iremote[i].rank  = owner;
5186   }
5187   /* Create SF to communicate how many nonzero columns for each row */
5188   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5189   /* SF will figure out the number of nonzero colunms for each row, and their
5190    * offsets
5191    * */
5192   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5193   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5194   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5195 
5196   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5197   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5198   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5199   roffsets[0] = 0;
5200   roffsets[1] = 0;
5201   for (i=0;i<plocalsize;i++) {
5202     /* diag */
5203     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5204     /* off diag */
5205     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5206     /* compute offsets so that we relative location for each row */
5207     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5208     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5209   }
5210   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5211   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5212   /* 'r' means root, and 'l' means leaf */
5213   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5214   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5215   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5216   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5217   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5218   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5219   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5220   dntotalcols = 0;
5221   ontotalcols = 0;
5222   ncol = 0;
5223   for (i=0;i<nrows;i++) {
5224     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5225     ncol = PetscMax(pnnz[i],ncol);
5226     /* diag */
5227     dntotalcols += nlcols[i*2+0];
5228     /* off diag */
5229     ontotalcols += nlcols[i*2+1];
5230   }
5231   /* We do not need to figure the right number of columns
5232    * since all the calculations will be done by going through the raw data
5233    * */
5234   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5235   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5236   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5237   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5238   /* diag */
5239   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5240   /* off diag */
5241   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5242   /* diag */
5243   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5244   /* off diag */
5245   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5246   dntotalcols = 0;
5247   ontotalcols = 0;
5248   ntotalcols  = 0;
5249   for (i=0;i<nrows;i++) {
5250     owner = 0;
5251     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5252     /* Set iremote for diag matrix */
5253     for (j=0;j<nlcols[i*2+0];j++) {
5254       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5255       iremote[dntotalcols].rank    = owner;
5256       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5257       ilocal[dntotalcols++]        = ntotalcols++;
5258     }
5259     /* off diag */
5260     for (j=0;j<nlcols[i*2+1];j++) {
5261       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5262       oiremote[ontotalcols].rank    = owner;
5263       oilocal[ontotalcols++]        = ntotalcols++;
5264     }
5265   }
5266   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5267   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5268   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5269   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5270   /* P serves as roots and P_oth is leaves
5271    * Diag matrix
5272    * */
5273   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5274   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5275   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5276 
5277   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5278   /* Off diag */
5279   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5280   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5281   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5282   /* We operate on the matrix internal data for saving memory */
5283   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5284   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5285   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5286   /* Convert to global indices for diag matrix */
5287   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5288   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5289   /* We want P_oth store global indices */
5290   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5291   /* Use memory scalable approach */
5292   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5293   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5294   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5295   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5296   /* Convert back to local indices */
5297   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5298   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5299   nout = 0;
5300   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5301   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5302   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5303   /* Exchange values */
5304   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5305   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5306   /* Stop PETSc from shrinking memory */
5307   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5308   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5309   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5310   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5311   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5312   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5313   /* ``New MatDestroy" takes care of PetscSF objects as well */
5314   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5315   PetscFunctionReturn(0);
5316 }
5317 
5318 /*
5319  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5320  * This supports MPIAIJ and MAIJ
5321  * */
5322 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5323 {
5324   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5325   Mat_SeqAIJ            *p_oth;
5326   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5327   IS                    rows,map;
5328   PetscHMapI            hamp;
5329   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5330   MPI_Comm              comm;
5331   PetscSF               sf,osf;
5332   PetscBool             has;
5333   PetscErrorCode        ierr;
5334 
5335   PetscFunctionBegin;
5336   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5337   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5338   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5339    *  and then create a submatrix (that often is an overlapping matrix)
5340    * */
5341   if (reuse==MAT_INITIAL_MATRIX) {
5342     /* Use a hash table to figure out unique keys */
5343     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5344     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5345     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5346     count = 0;
5347     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5348     for (i=0;i<a->B->cmap->n;i++) {
5349       key  = a->garray[i]/dof;
5350       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5351       if (!has) {
5352         mapping[i] = count;
5353         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5354       } else {
5355         /* Current 'i' has the same value the previous step */
5356         mapping[i] = count-1;
5357       }
5358     }
5359     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5360     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5361     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5362     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5363     off = 0;
5364     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5365     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5366     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5367     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5368     /* In case, the matrix was already created but users want to recreate the matrix */
5369     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5370     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5371     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5372     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5373   } else if (reuse==MAT_REUSE_MATRIX) {
5374     /* If matrix was already created, we simply update values using SF objects
5375      * that as attached to the matrix ealier.
5376      *  */
5377     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5378     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5379     if (!sf || !osf) {
5380       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5381     }
5382     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5383     /* Update values in place */
5384     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5385     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5386     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5387     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5388   } else {
5389     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5390   }
5391   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5392   PetscFunctionReturn(0);
5393 }
5394 
5395 /*@C
5396     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5397 
5398     Collective on Mat
5399 
5400    Input Parameters:
5401 +    A,B - the matrices in mpiaij format
5402 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5403 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5404 
5405    Output Parameter:
5406 +    rowb, colb - index sets of rows and columns of B to extract
5407 -    B_seq - the sequential matrix generated
5408 
5409     Level: developer
5410 
5411 @*/
5412 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5413 {
5414   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5415   PetscErrorCode ierr;
5416   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5417   IS             isrowb,iscolb;
5418   Mat            *bseq=NULL;
5419 
5420   PetscFunctionBegin;
5421   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5422     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5423   }
5424   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5425 
5426   if (scall == MAT_INITIAL_MATRIX) {
5427     start = A->cmap->rstart;
5428     cmap  = a->garray;
5429     nzA   = a->A->cmap->n;
5430     nzB   = a->B->cmap->n;
5431     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5432     ncols = 0;
5433     for (i=0; i<nzB; i++) {  /* row < local row index */
5434       if (cmap[i] < start) idx[ncols++] = cmap[i];
5435       else break;
5436     }
5437     imark = i;
5438     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5439     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5440     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5441     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5442   } else {
5443     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5444     isrowb  = *rowb; iscolb = *colb;
5445     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5446     bseq[0] = *B_seq;
5447   }
5448   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5449   *B_seq = bseq[0];
5450   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5451   if (!rowb) {
5452     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5453   } else {
5454     *rowb = isrowb;
5455   }
5456   if (!colb) {
5457     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5458   } else {
5459     *colb = iscolb;
5460   }
5461   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5462   PetscFunctionReturn(0);
5463 }
5464 
5465 /*
5466     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5467     of the OFF-DIAGONAL portion of local A
5468 
5469     Collective on Mat
5470 
5471    Input Parameters:
5472 +    A,B - the matrices in mpiaij format
5473 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5474 
5475    Output Parameter:
5476 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5477 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5478 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5479 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5480 
5481     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5482      for this matrix. This is not desirable..
5483 
5484     Level: developer
5485 
5486 */
5487 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5488 {
5489   PetscErrorCode         ierr;
5490   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5491   Mat_SeqAIJ             *b_oth;
5492   VecScatter             ctx;
5493   MPI_Comm               comm;
5494   const PetscMPIInt      *rprocs,*sprocs;
5495   const PetscInt         *srow,*rstarts,*sstarts;
5496   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5497   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5498   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5499   MPI_Request            *rwaits = NULL,*swaits = NULL;
5500   MPI_Status             rstatus;
5501   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5502 
5503   PetscFunctionBegin;
5504   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5506 
5507   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5508     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5509   }
5510   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5511   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5512 
5513   if (size == 1) {
5514     startsj_s = NULL;
5515     bufa_ptr  = NULL;
5516     *B_oth    = NULL;
5517     PetscFunctionReturn(0);
5518   }
5519 
5520   ctx = a->Mvctx;
5521   tag = ((PetscObject)ctx)->tag;
5522 
5523   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5524   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5525   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5526   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5527   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5528   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5529   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5530 
5531   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5532   if (scall == MAT_INITIAL_MATRIX) {
5533     /* i-array */
5534     /*---------*/
5535     /*  post receives */
5536     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5537     for (i=0; i<nrecvs; i++) {
5538       rowlen = rvalues + rstarts[i]*rbs;
5539       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5540       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5541     }
5542 
5543     /* pack the outgoing message */
5544     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5545 
5546     sstartsj[0] = 0;
5547     rstartsj[0] = 0;
5548     len         = 0; /* total length of j or a array to be sent */
5549     if (nsends) {
5550       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5551       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5552     }
5553     for (i=0; i<nsends; i++) {
5554       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5555       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5556       for (j=0; j<nrows; j++) {
5557         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5558         for (l=0; l<sbs; l++) {
5559           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5560 
5561           rowlen[j*sbs+l] = ncols;
5562 
5563           len += ncols;
5564           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5565         }
5566         k++;
5567       }
5568       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5569 
5570       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5571     }
5572     /* recvs and sends of i-array are completed */
5573     i = nrecvs;
5574     while (i--) {
5575       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5576     }
5577     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5578     ierr = PetscFree(svalues);CHKERRQ(ierr);
5579 
5580     /* allocate buffers for sending j and a arrays */
5581     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5582     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5583 
5584     /* create i-array of B_oth */
5585     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5586 
5587     b_othi[0] = 0;
5588     len       = 0; /* total length of j or a array to be received */
5589     k         = 0;
5590     for (i=0; i<nrecvs; i++) {
5591       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5592       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5593       for (j=0; j<nrows; j++) {
5594         b_othi[k+1] = b_othi[k] + rowlen[j];
5595         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5596         k++;
5597       }
5598       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5599     }
5600     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5601 
5602     /* allocate space for j and a arrrays of B_oth */
5603     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5604     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5605 
5606     /* j-array */
5607     /*---------*/
5608     /*  post receives of j-array */
5609     for (i=0; i<nrecvs; i++) {
5610       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5611       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5612     }
5613 
5614     /* pack the outgoing message j-array */
5615     if (nsends) k = sstarts[0];
5616     for (i=0; i<nsends; i++) {
5617       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5618       bufJ  = bufj+sstartsj[i];
5619       for (j=0; j<nrows; j++) {
5620         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5621         for (ll=0; ll<sbs; ll++) {
5622           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5623           for (l=0; l<ncols; l++) {
5624             *bufJ++ = cols[l];
5625           }
5626           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5627         }
5628       }
5629       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5630     }
5631 
5632     /* recvs and sends of j-array are completed */
5633     i = nrecvs;
5634     while (i--) {
5635       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5636     }
5637     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5638   } else if (scall == MAT_REUSE_MATRIX) {
5639     sstartsj = *startsj_s;
5640     rstartsj = *startsj_r;
5641     bufa     = *bufa_ptr;
5642     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5643     b_otha   = b_oth->a;
5644   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5645 
5646   /* a-array */
5647   /*---------*/
5648   /*  post receives of a-array */
5649   for (i=0; i<nrecvs; i++) {
5650     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5651     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5652   }
5653 
5654   /* pack the outgoing message a-array */
5655   if (nsends) k = sstarts[0];
5656   for (i=0; i<nsends; i++) {
5657     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5658     bufA  = bufa+sstartsj[i];
5659     for (j=0; j<nrows; j++) {
5660       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5661       for (ll=0; ll<sbs; ll++) {
5662         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5663         for (l=0; l<ncols; l++) {
5664           *bufA++ = vals[l];
5665         }
5666         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5667       }
5668     }
5669     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5670   }
5671   /* recvs and sends of a-array are completed */
5672   i = nrecvs;
5673   while (i--) {
5674     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5675   }
5676   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5677   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5678 
5679   if (scall == MAT_INITIAL_MATRIX) {
5680     /* put together the new matrix */
5681     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5682 
5683     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5684     /* Since these are PETSc arrays, change flags to free them as necessary. */
5685     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5686     b_oth->free_a  = PETSC_TRUE;
5687     b_oth->free_ij = PETSC_TRUE;
5688     b_oth->nonew   = 0;
5689 
5690     ierr = PetscFree(bufj);CHKERRQ(ierr);
5691     if (!startsj_s || !bufa_ptr) {
5692       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5693       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5694     } else {
5695       *startsj_s = sstartsj;
5696       *startsj_r = rstartsj;
5697       *bufa_ptr  = bufa;
5698     }
5699   }
5700 
5701   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5702   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5703   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5704   PetscFunctionReturn(0);
5705 }
5706 
5707 /*@C
5708   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5709 
5710   Not Collective
5711 
5712   Input Parameters:
5713 . A - The matrix in mpiaij format
5714 
5715   Output Parameter:
5716 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5717 . colmap - A map from global column index to local index into lvec
5718 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5719 
5720   Level: developer
5721 
5722 @*/
5723 #if defined(PETSC_USE_CTABLE)
5724 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5725 #else
5726 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5727 #endif
5728 {
5729   Mat_MPIAIJ *a;
5730 
5731   PetscFunctionBegin;
5732   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5733   PetscValidPointer(lvec, 2);
5734   PetscValidPointer(colmap, 3);
5735   PetscValidPointer(multScatter, 4);
5736   a = (Mat_MPIAIJ*) A->data;
5737   if (lvec) *lvec = a->lvec;
5738   if (colmap) *colmap = a->colmap;
5739   if (multScatter) *multScatter = a->Mvctx;
5740   PetscFunctionReturn(0);
5741 }
5742 
5743 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5744 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5745 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5746 #if defined(PETSC_HAVE_MKL_SPARSE)
5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5748 #endif
5749 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5750 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5751 #if defined(PETSC_HAVE_ELEMENTAL)
5752 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5753 #endif
5754 #if defined(PETSC_HAVE_HYPRE)
5755 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5756 #endif
5757 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5758 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5759 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5760 
5761 /*
5762     Computes (B'*A')' since computing B*A directly is untenable
5763 
5764                n                       p                          p
5765         (              )       (              )         (                  )
5766       m (      A       )  *  n (       B      )   =   m (         C        )
5767         (              )       (              )         (                  )
5768 
5769 */
5770 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5771 {
5772   PetscErrorCode ierr;
5773   Mat            At,Bt,Ct;
5774 
5775   PetscFunctionBegin;
5776   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5777   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5778   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5779   ierr = MatDestroy(&At);CHKERRQ(ierr);
5780   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5781   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5782   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5783   PetscFunctionReturn(0);
5784 }
5785 
5786 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5787 {
5788   PetscErrorCode ierr;
5789   PetscInt       m=A->rmap->n,n=B->cmap->n;
5790 
5791   PetscFunctionBegin;
5792   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5793   ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5794   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5795   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
5796   ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr);
5797   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5798   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5799 
5800   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5801   PetscFunctionReturn(0);
5802 }
5803 
5804 /* ----------------------------------------------------------------*/
5805 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5806 {
5807   Mat_Product *product = C->product;
5808   Mat         A = product->A,B=product->B;
5809 
5810   PetscFunctionBegin;
5811   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5812     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5813 
5814   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5815   C->ops->productsymbolic = MatProductSymbolic_AB;
5816   PetscFunctionReturn(0);
5817 }
5818 
5819 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5820 {
5821   PetscErrorCode ierr;
5822   Mat_Product    *product = C->product;
5823 
5824   PetscFunctionBegin;
5825   if (product->type == MATPRODUCT_AB) {
5826     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5827   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]);
5828   PetscFunctionReturn(0);
5829 }
5830 /* ----------------------------------------------------------------*/
5831 
5832 /*MC
5833    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5834 
5835    Options Database Keys:
5836 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5837 
5838    Level: beginner
5839 
5840    Notes:
5841     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5842     in this case the values associated with the rows and columns one passes in are set to zero
5843     in the matrix
5844 
5845     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5846     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5847 
5848 .seealso: MatCreateAIJ()
5849 M*/
5850 
5851 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5852 {
5853   Mat_MPIAIJ     *b;
5854   PetscErrorCode ierr;
5855   PetscMPIInt    size;
5856 
5857   PetscFunctionBegin;
5858   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5859 
5860   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5861   B->data       = (void*)b;
5862   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5863   B->assembled  = PETSC_FALSE;
5864   B->insertmode = NOT_SET_VALUES;
5865   b->size       = size;
5866 
5867   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5868 
5869   /* build cache for off array entries formed */
5870   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5871 
5872   b->donotstash  = PETSC_FALSE;
5873   b->colmap      = 0;
5874   b->garray      = 0;
5875   b->roworiented = PETSC_TRUE;
5876 
5877   /* stuff used for matrix vector multiply */
5878   b->lvec  = NULL;
5879   b->Mvctx = NULL;
5880 
5881   /* stuff for MatGetRow() */
5882   b->rowindices   = 0;
5883   b->rowvalues    = 0;
5884   b->getrowactive = PETSC_FALSE;
5885 
5886   /* flexible pointer used in CUSP/CUSPARSE classes */
5887   b->spptr = NULL;
5888 
5889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5890   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5891   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5894   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5896   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5897   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5899 #if defined(PETSC_HAVE_MKL_SPARSE)
5900   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5901 #endif
5902   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5903   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5904   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5905 #if defined(PETSC_HAVE_ELEMENTAL)
5906   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5907 #endif
5908   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5909   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5910   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5911   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5912 #if defined(PETSC_HAVE_HYPRE)
5913   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5914   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5915 #endif
5916   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5917   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5918   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5919   PetscFunctionReturn(0);
5920 }
5921 
5922 /*@C
5923      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5924          and "off-diagonal" part of the matrix in CSR format.
5925 
5926    Collective
5927 
5928    Input Parameters:
5929 +  comm - MPI communicator
5930 .  m - number of local rows (Cannot be PETSC_DECIDE)
5931 .  n - This value should be the same as the local size used in creating the
5932        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5933        calculated if N is given) For square matrices n is almost always m.
5934 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5935 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5936 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5937 .   j - column indices
5938 .   a - matrix values
5939 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5940 .   oj - column indices
5941 -   oa - matrix values
5942 
5943    Output Parameter:
5944 .   mat - the matrix
5945 
5946    Level: advanced
5947 
5948    Notes:
5949        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5950        must free the arrays once the matrix has been destroyed and not before.
5951 
5952        The i and j indices are 0 based
5953 
5954        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5955 
5956        This sets local rows and cannot be used to set off-processor values.
5957 
5958        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5959        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5960        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5961        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5962        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5963        communication if it is known that only local entries will be set.
5964 
5965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5966           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5967 @*/
5968 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5969 {
5970   PetscErrorCode ierr;
5971   Mat_MPIAIJ     *maij;
5972 
5973   PetscFunctionBegin;
5974   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5975   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5976   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5977   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5978   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5979   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5980   maij = (Mat_MPIAIJ*) (*mat)->data;
5981 
5982   (*mat)->preallocated = PETSC_TRUE;
5983 
5984   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5985   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5986 
5987   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5988   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5989 
5990   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5991   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5992   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5993   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5994 
5995   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5996   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5997   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5998   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5999   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6000   PetscFunctionReturn(0);
6001 }
6002 
6003 /*
6004     Special version for direct calls from Fortran
6005 */
6006 #include <petsc/private/fortranimpl.h>
6007 
6008 /* Change these macros so can be used in void function */
6009 #undef CHKERRQ
6010 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6011 #undef SETERRQ2
6012 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6013 #undef SETERRQ3
6014 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6015 #undef SETERRQ
6016 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6017 
6018 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6019 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6020 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6021 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6022 #else
6023 #endif
6024 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6025 {
6026   Mat            mat  = *mmat;
6027   PetscInt       m    = *mm, n = *mn;
6028   InsertMode     addv = *maddv;
6029   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6030   PetscScalar    value;
6031   PetscErrorCode ierr;
6032 
6033   MatCheckPreallocated(mat,1);
6034   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6035   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6036   {
6037     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6038     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6039     PetscBool roworiented = aij->roworiented;
6040 
6041     /* Some Variables required in the macro */
6042     Mat        A                    = aij->A;
6043     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6044     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6045     MatScalar  *aa                  = a->a;
6046     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6047     Mat        B                    = aij->B;
6048     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6049     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6050     MatScalar  *ba                  = b->a;
6051     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6052      * cannot use "#if defined" inside a macro. */
6053     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6054 
6055     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6056     PetscInt  nonew = a->nonew;
6057     MatScalar *ap1,*ap2;
6058 
6059     PetscFunctionBegin;
6060     for (i=0; i<m; i++) {
6061       if (im[i] < 0) continue;
6062       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6063       if (im[i] >= rstart && im[i] < rend) {
6064         row      = im[i] - rstart;
6065         lastcol1 = -1;
6066         rp1      = aj + ai[row];
6067         ap1      = aa + ai[row];
6068         rmax1    = aimax[row];
6069         nrow1    = ailen[row];
6070         low1     = 0;
6071         high1    = nrow1;
6072         lastcol2 = -1;
6073         rp2      = bj + bi[row];
6074         ap2      = ba + bi[row];
6075         rmax2    = bimax[row];
6076         nrow2    = bilen[row];
6077         low2     = 0;
6078         high2    = nrow2;
6079 
6080         for (j=0; j<n; j++) {
6081           if (roworiented) value = v[i*n+j];
6082           else value = v[i+j*m];
6083           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6084           if (in[j] >= cstart && in[j] < cend) {
6085             col = in[j] - cstart;
6086             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6087 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6088             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6089 #endif
6090           } else if (in[j] < 0) continue;
6091           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6092             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6093             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6094           } else {
6095             if (mat->was_assembled) {
6096               if (!aij->colmap) {
6097                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6098               }
6099 #if defined(PETSC_USE_CTABLE)
6100               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6101               col--;
6102 #else
6103               col = aij->colmap[in[j]] - 1;
6104 #endif
6105               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6106                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6107                 col  =  in[j];
6108                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6109                 B        = aij->B;
6110                 b        = (Mat_SeqAIJ*)B->data;
6111                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6112                 rp2      = bj + bi[row];
6113                 ap2      = ba + bi[row];
6114                 rmax2    = bimax[row];
6115                 nrow2    = bilen[row];
6116                 low2     = 0;
6117                 high2    = nrow2;
6118                 bm       = aij->B->rmap->n;
6119                 ba       = b->a;
6120                 inserted = PETSC_FALSE;
6121               }
6122             } else col = in[j];
6123             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6124 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6125             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6126 #endif
6127           }
6128         }
6129       } else if (!aij->donotstash) {
6130         if (roworiented) {
6131           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6132         } else {
6133           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6134         }
6135       }
6136     }
6137   }
6138   PetscFunctionReturnVoid();
6139 }
6140