xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 475b8b616cdcda5163cdfb14dea2d970a8e9d902)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = NULL;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = NULL;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818   case MAT_SORTED_FULL:
1819     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1820     break;
1821   case MAT_IGNORE_OFF_PROC_ENTRIES:
1822     a->donotstash = flg;
1823     break;
1824   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1825   case MAT_SPD:
1826   case MAT_SYMMETRIC:
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828   case MAT_HERMITIAN:
1829   case MAT_SYMMETRY_ETERNAL:
1830     break;
1831   case MAT_SUBMAT_SINGLEIS:
1832     A->submat_singleis = flg;
1833     break;
1834   case MAT_STRUCTURE_ONLY:
1835     /* The option is handled directly by MatSetOption() */
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1844 {
1845   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1846   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1847   PetscErrorCode ierr;
1848   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1849   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1850   PetscInt       *cmap,*idx_p;
1851 
1852   PetscFunctionBegin;
1853   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1854   mat->getrowactive = PETSC_TRUE;
1855 
1856   if (!mat->rowvalues && (idx || v)) {
1857     /*
1858         allocate enough space to hold information from the longest row.
1859     */
1860     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1861     PetscInt   max = 1,tmp;
1862     for (i=0; i<matin->rmap->n; i++) {
1863       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1864       if (max < tmp) max = tmp;
1865     }
1866     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1867   }
1868 
1869   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1870   lrow = row - rstart;
1871 
1872   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1873   if (!v)   {pvA = NULL; pvB = NULL;}
1874   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1875   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1876   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1877   nztot = nzA + nzB;
1878 
1879   cmap = mat->garray;
1880   if (v  || idx) {
1881     if (nztot) {
1882       /* Sort by increasing column numbers, assuming A and B already sorted */
1883       PetscInt imark = -1;
1884       if (v) {
1885         *v = v_p = mat->rowvalues;
1886         for (i=0; i<nzB; i++) {
1887           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1888           else break;
1889         }
1890         imark = i;
1891         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1892         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1893       }
1894       if (idx) {
1895         *idx = idx_p = mat->rowindices;
1896         if (imark > -1) {
1897           for (i=0; i<imark; i++) {
1898             idx_p[i] = cmap[cworkB[i]];
1899           }
1900         } else {
1901           for (i=0; i<nzB; i++) {
1902             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1903             else break;
1904           }
1905           imark = i;
1906         }
1907         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1908         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1909       }
1910     } else {
1911       if (idx) *idx = NULL;
1912       if (v)   *v   = NULL;
1913     }
1914   }
1915   *nz  = nztot;
1916   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1917   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1922 {
1923   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1924 
1925   PetscFunctionBegin;
1926   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1927   aij->getrowactive = PETSC_FALSE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1932 {
1933   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1934   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       i,j,cstart = mat->cmap->rstart;
1937   PetscReal      sum = 0.0;
1938   MatScalar      *v;
1939 
1940   PetscFunctionBegin;
1941   if (aij->size == 1) {
1942     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1943   } else {
1944     if (type == NORM_FROBENIUS) {
1945       v = amat->a;
1946       for (i=0; i<amat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1960       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v     = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v   = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1999 {
2000   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2001   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2002   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2003   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2004   PetscErrorCode  ierr;
2005   Mat             B,A_diag,*B_diag;
2006   const MatScalar *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2237        will be updated */
2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2239     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2240       Y->offloadmask = PETSC_OFFLOAD_CPU;
2241     }
2242 #endif
2243   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2244     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2245   } else {
2246     Mat      B;
2247     PetscInt *nnz_d,*nnz_o;
2248     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2249     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2250     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2251     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2252     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2253     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2256     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2257     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2258     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2266 
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2283 {
2284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2289   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308   PetscInt       i,*idxb = NULL,m = A->rmap->n;
2309   PetscScalar    *va,*vv;
2310   Vec            vB,vA;
2311   const PetscScalar *vb;
2312 
2313   PetscFunctionBegin;
2314   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vA);CHKERRQ(ierr);
2315   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2316 
2317   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2318   if (idx) {
2319     for (i=0; i<m; i++) {
2320       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2321     }
2322   }
2323 
2324   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vB);CHKERRQ(ierr);
2325   if (idx) {
2326     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2327   }
2328   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2329 
2330   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2331   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2332   for (i=0; i<m; i++) {
2333     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2334       vv[i] = vb[i];
2335       if (idx) idx[i] = a->garray[idxb[i]];
2336     } else {
2337       vv[i] = va[i];
2338       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idx[i] > a->garray[idxb[i]])
2339         idx[i] = a->garray[idxb[i]];
2340     }
2341   }
2342   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2343   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2344   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2345   ierr = PetscFree(idxb);CHKERRQ(ierr);
2346   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2347   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2352 {
2353   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2354   PetscErrorCode ierr;
2355   PetscInt       i,*idxb = NULL;
2356   PetscScalar    *va,*vb;
2357   Vec            vtmp;
2358 
2359   PetscFunctionBegin;
2360   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2361   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2362   if (idx) {
2363     for (i=0; i<A->cmap->n; i++) {
2364       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2365     }
2366   }
2367 
2368   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2369   if (idx) {
2370     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2371   }
2372   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2373   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2374 
2375   for (i=0; i<A->rmap->n; i++) {
2376     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2377       va[i] = vb[i];
2378       if (idx) idx[i] = a->garray[idxb[i]];
2379     }
2380   }
2381 
2382   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2383   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2384   ierr = PetscFree(idxb);CHKERRQ(ierr);
2385   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2390 {
2391   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2392   PetscInt       m = A->rmap->n,n = A->cmap->n;
2393   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2394   PetscInt       *cmap  = mat->garray;
2395   PetscInt       *diagIdx, *offdiagIdx;
2396   Vec            diagV, offdiagV;
2397   PetscScalar    *a, *diagA, *offdiagA, *ba;
2398   PetscInt       r,j,col,ncols,*bi,*bj;
2399   PetscErrorCode ierr;
2400   Mat            B = mat->B;
2401   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2402 
2403   PetscFunctionBegin;
2404   /* When a process holds entire A and other processes have no entry */
2405   if (A->cmap->N == n) {
2406     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2407     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2408     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2409     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2410     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2411     PetscFunctionReturn(0);
2412   } else if (n == 0) {
2413     if (m) {
2414       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2415       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2416       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2417     }
2418     PetscFunctionReturn(0);
2419   }
2420 
2421   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2423   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2424   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2425 
2426   /* Get offdiagIdx[] for implicit 0.0 */
2427   ba = b->a;
2428   bi = b->i;
2429   bj = b->j;
2430   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2431   for (r = 0; r < m; r++) {
2432     ncols = bi[r+1] - bi[r];
2433     if (ncols == A->cmap->N - n) { /* Brow is dense */
2434       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2435     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2436       offdiagA[r] = 0.0;
2437 
2438       /* Find first hole in the cmap */
2439       for (j=0; j<ncols; j++) {
2440         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2441         if (col > j && j < cstart) {
2442           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2443           break;
2444         } else if (col > j + n && j >= cstart) {
2445           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2446           break;
2447         }
2448       }
2449       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2450         /* a hole is outside compressed Bcols */
2451         if (ncols == 0) {
2452           if (cstart) {
2453             offdiagIdx[r] = 0;
2454           } else offdiagIdx[r] = cend;
2455         } else { /* ncols > 0 */
2456           offdiagIdx[r] = cmap[ncols-1] + 1;
2457           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2458         }
2459       }
2460     }
2461 
2462     for (j=0; j<ncols; j++) {
2463       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2464       ba++; bj++;
2465     }
2466   }
2467 
2468   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2469   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2470   for (r = 0; r < m; ++r) {
2471     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2472       a[r]   = diagA[r];
2473       if (idx) idx[r] = cstart + diagIdx[r];
2474     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2475       a[r] = diagA[r];
2476       if (idx) {
2477         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2478           idx[r] = cstart + diagIdx[r];
2479         } else idx[r] = offdiagIdx[r];
2480       }
2481     } else {
2482       a[r]   = offdiagA[r];
2483       if (idx) idx[r] = offdiagIdx[r];
2484     }
2485   }
2486   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2487   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2488   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2489   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2490   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2491   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2496 {
2497   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2498   PetscInt       m = A->rmap->n,n = A->cmap->n;
2499   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2500   PetscInt       *cmap  = mat->garray;
2501   PetscInt       *diagIdx, *offdiagIdx;
2502   Vec            diagV, offdiagV;
2503   PetscScalar    *a, *diagA, *offdiagA, *ba;
2504   PetscInt       r,j,col,ncols,*bi,*bj;
2505   PetscErrorCode ierr;
2506   Mat            B = mat->B;
2507   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2508 
2509   PetscFunctionBegin;
2510   /* When a process holds entire A and other processes have no entry */
2511   if (A->cmap->N == n) {
2512     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2513     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2514     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2515     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2516     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2517     PetscFunctionReturn(0);
2518   } else if (n == 0) {
2519     if (m) {
2520       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2521       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2522       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2523     }
2524     PetscFunctionReturn(0);
2525   }
2526 
2527   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2528   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2529   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2530   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2531 
2532   /* Get offdiagIdx[] for implicit 0.0 */
2533   ba = b->a;
2534   bi = b->i;
2535   bj = b->j;
2536   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2537   for (r = 0; r < m; r++) {
2538     ncols = bi[r+1] - bi[r];
2539     if (ncols == A->cmap->N - n) { /* Brow is dense */
2540       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2541     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2542       offdiagA[r] = 0.0;
2543 
2544       /* Find first hole in the cmap */
2545       for (j=0; j<ncols; j++) {
2546         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2547         if (col > j && j < cstart) {
2548           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2549           break;
2550         } else if (col > j + n && j >= cstart) {
2551           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2552           break;
2553         }
2554       }
2555       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2556         /* a hole is outside compressed Bcols */
2557         if (ncols == 0) {
2558           if (cstart) {
2559             offdiagIdx[r] = 0;
2560           } else offdiagIdx[r] = cend;
2561         } else { /* ncols > 0 */
2562           offdiagIdx[r] = cmap[ncols-1] + 1;
2563           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2564         }
2565       }
2566     }
2567 
2568     for (j=0; j<ncols; j++) {
2569       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2570       ba++; bj++;
2571     }
2572   }
2573 
2574   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2575   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2576   for (r = 0; r < m; ++r) {
2577     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2578       a[r] = diagA[r];
2579       if (idx) idx[r] = cstart + diagIdx[r];
2580     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2581       a[r] = diagA[r];
2582       if (idx) {
2583         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2584           idx[r] = cstart + diagIdx[r];
2585         } else idx[r] = offdiagIdx[r];
2586       }
2587     } else {
2588       a[r] = offdiagA[r];
2589       if (idx) idx[r] = offdiagIdx[r];
2590     }
2591   }
2592   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2593   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2594   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2595   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2596   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2597   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2602 {
2603   PetscErrorCode ierr;
2604   Mat            *dummy;
2605 
2606   PetscFunctionBegin;
2607   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2608   *newmat = *dummy;
2609   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2614 {
2615   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2616   PetscErrorCode ierr;
2617 
2618   PetscFunctionBegin;
2619   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2620   A->factorerrortype = a->A->factorerrortype;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2625 {
2626   PetscErrorCode ierr;
2627   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2628 
2629   PetscFunctionBegin;
2630   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2631   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2632   if (x->assembled) {
2633     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2634   } else {
2635     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2636   }
2637   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2638   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2643 {
2644   PetscFunctionBegin;
2645   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2646   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 /*@
2651    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2652 
2653    Collective on Mat
2654 
2655    Input Parameters:
2656 +    A - the matrix
2657 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2658 
2659  Level: advanced
2660 
2661 @*/
2662 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2663 {
2664   PetscErrorCode       ierr;
2665 
2666   PetscFunctionBegin;
2667   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2672 {
2673   PetscErrorCode       ierr;
2674   PetscBool            sc = PETSC_FALSE,flg;
2675 
2676   PetscFunctionBegin;
2677   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2678   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2679   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2680   if (flg) {
2681     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2682   }
2683   ierr = PetscOptionsTail();CHKERRQ(ierr);
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2688 {
2689   PetscErrorCode ierr;
2690   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2691   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2692 
2693   PetscFunctionBegin;
2694   if (!Y->preallocated) {
2695     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2696   } else if (!aij->nz) {
2697     PetscInt nonew = aij->nonew;
2698     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2699     aij->nonew = nonew;
2700   }
2701   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2706 {
2707   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2712   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2713   if (d) {
2714     PetscInt rstart;
2715     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2716     *d += rstart;
2717 
2718   }
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2723 {
2724   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 /* -------------------------------------------------------------------*/
2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2734                                        MatGetRow_MPIAIJ,
2735                                        MatRestoreRow_MPIAIJ,
2736                                        MatMult_MPIAIJ,
2737                                 /* 4*/ MatMultAdd_MPIAIJ,
2738                                        MatMultTranspose_MPIAIJ,
2739                                        MatMultTransposeAdd_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                 /*10*/ NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        MatSOR_MPIAIJ,
2747                                        MatTranspose_MPIAIJ,
2748                                 /*15*/ MatGetInfo_MPIAIJ,
2749                                        MatEqual_MPIAIJ,
2750                                        MatGetDiagonal_MPIAIJ,
2751                                        MatDiagonalScale_MPIAIJ,
2752                                        MatNorm_MPIAIJ,
2753                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2754                                        MatAssemblyEnd_MPIAIJ,
2755                                        MatSetOption_MPIAIJ,
2756                                        MatZeroEntries_MPIAIJ,
2757                                 /*24*/ MatZeroRows_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                 /*29*/ MatSetUp_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        MatGetDiagonalBlock_MPIAIJ,
2766                                        NULL,
2767                                 /*34*/ MatDuplicate_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                 /*39*/ MatAXPY_MPIAIJ,
2773                                        MatCreateSubMatrices_MPIAIJ,
2774                                        MatIncreaseOverlap_MPIAIJ,
2775                                        MatGetValues_MPIAIJ,
2776                                        MatCopy_MPIAIJ,
2777                                 /*44*/ MatGetRowMax_MPIAIJ,
2778                                        MatScale_MPIAIJ,
2779                                        MatShift_MPIAIJ,
2780                                        MatDiagonalSet_MPIAIJ,
2781                                        MatZeroRowsColumns_MPIAIJ,
2782                                 /*49*/ MatSetRandom_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2788                                        NULL,
2789                                        MatSetUnfactored_MPIAIJ,
2790                                        MatPermute_MPIAIJ,
2791                                        NULL,
2792                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2793                                        MatDestroy_MPIAIJ,
2794                                        MatView_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                 /*64*/ NULL,
2798                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2803                                        MatGetRowMinAbs_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                 /*75*/ MatFDColoringApply_AIJ,
2809                                        MatSetFromOptions_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        MatFindZeroDiagonals_MPIAIJ,
2813                                 /*80*/ NULL,
2814                                        NULL,
2815                                        NULL,
2816                                 /*83*/ MatLoad_MPIAIJ,
2817                                        MatIsSymmetric_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                 /*89*/ NULL,
2823                                        NULL,
2824                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        MatBindToCPU_MPIAIJ,
2832                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        MatConjugate_MPIAIJ,
2836                                        NULL,
2837                                 /*104*/MatSetValuesRow_MPIAIJ,
2838                                        MatRealPart_MPIAIJ,
2839                                        MatImaginaryPart_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                 /*109*/NULL,
2843                                        NULL,
2844                                        MatGetRowMin_MPIAIJ,
2845                                        NULL,
2846                                        MatMissingDiagonal_MPIAIJ,
2847                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2848                                        NULL,
2849                                        MatGetGhosts_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                 /*119*/NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatGetMultiProcBlock_MPIAIJ,
2857                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2858                                        MatGetColumnNorms_MPIAIJ,
2859                                        MatInvertBlockDiagonal_MPIAIJ,
2860                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2861                                        MatCreateSubMatricesMPI_MPIAIJ,
2862                                 /*129*/NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2866                                        NULL,
2867                                 /*134*/NULL,
2868                                        NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        NULL,
2872                                 /*139*/MatSetBlockSizes_MPIAIJ,
2873                                        NULL,
2874                                        NULL,
2875                                        MatFDColoringSetUp_MPIXAIJ,
2876                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2877                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2878                                 /*145*/NULL,
2879                                        NULL,
2880                                        NULL
2881 };
2882 
2883 /* ----------------------------------------------------------------------------------------*/
2884 
2885 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2886 {
2887   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2888   PetscErrorCode ierr;
2889 
2890   PetscFunctionBegin;
2891   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2892   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2893   PetscFunctionReturn(0);
2894 }
2895 
2896 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2897 {
2898   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2899   PetscErrorCode ierr;
2900 
2901   PetscFunctionBegin;
2902   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2903   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2908 {
2909   Mat_MPIAIJ     *b;
2910   PetscErrorCode ierr;
2911   PetscMPIInt    size;
2912 
2913   PetscFunctionBegin;
2914   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2915   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2916   b = (Mat_MPIAIJ*)B->data;
2917 
2918 #if defined(PETSC_USE_CTABLE)
2919   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2920 #else
2921   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2922 #endif
2923   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2924   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2925   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2926 
2927   /* Because the B will have been resized we simply destroy it and create a new one each time */
2928   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2929   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2930   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2931   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2932   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2933   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2934   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2935 
2936   if (!B->preallocated) {
2937     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2938     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2939     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2940     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2941     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2942   }
2943 
2944   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2945   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2946   B->preallocated  = PETSC_TRUE;
2947   B->was_assembled = PETSC_FALSE;
2948   B->assembled     = PETSC_FALSE;
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2953 {
2954   Mat_MPIAIJ     *b;
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2959   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2960   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2961   b = (Mat_MPIAIJ*)B->data;
2962 
2963 #if defined(PETSC_USE_CTABLE)
2964   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2965 #else
2966   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2967 #endif
2968   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2969   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2970   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2971 
2972   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2973   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2974   B->preallocated  = PETSC_TRUE;
2975   B->was_assembled = PETSC_FALSE;
2976   B->assembled = PETSC_FALSE;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2981 {
2982   Mat            mat;
2983   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   *newmat = NULL;
2988   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2989   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2990   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2991   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2992   a       = (Mat_MPIAIJ*)mat->data;
2993 
2994   mat->factortype   = matin->factortype;
2995   mat->assembled    = matin->assembled;
2996   mat->insertmode   = NOT_SET_VALUES;
2997   mat->preallocated = matin->preallocated;
2998 
2999   a->size         = oldmat->size;
3000   a->rank         = oldmat->rank;
3001   a->donotstash   = oldmat->donotstash;
3002   a->roworiented  = oldmat->roworiented;
3003   a->rowindices   = NULL;
3004   a->rowvalues    = NULL;
3005   a->getrowactive = PETSC_FALSE;
3006 
3007   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3008   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3009 
3010   if (oldmat->colmap) {
3011 #if defined(PETSC_USE_CTABLE)
3012     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3013 #else
3014     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3015     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3016     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3017 #endif
3018   } else a->colmap = NULL;
3019   if (oldmat->garray) {
3020     PetscInt len;
3021     len  = oldmat->B->cmap->n;
3022     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3023     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3024     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3025   } else a->garray = NULL;
3026 
3027   /* It may happen MatDuplicate is called with a non-assembled matrix
3028      In fact, MatDuplicate only requires the matrix to be preallocated
3029      This may happen inside a DMCreateMatrix_Shell */
3030   if (oldmat->lvec) {
3031     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3032     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3033   }
3034   if (oldmat->Mvctx) {
3035     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3036     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3037   }
3038   if (oldmat->Mvctx_mpi1) {
3039     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
3040     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
3041   }
3042 
3043   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3044   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3045   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3046   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3047   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3048   *newmat = mat;
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3053 {
3054   PetscBool      isbinary, ishdf5;
3055   PetscErrorCode ierr;
3056 
3057   PetscFunctionBegin;
3058   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3059   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3060   /* force binary viewer to load .info file if it has not yet done so */
3061   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3062   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3063   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3064   if (isbinary) {
3065     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3066   } else if (ishdf5) {
3067 #if defined(PETSC_HAVE_HDF5)
3068     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3069 #else
3070     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3071 #endif
3072   } else {
3073     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3074   }
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3079 {
3080   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3081   PetscInt       *rowidxs,*colidxs;
3082   PetscScalar    *matvals;
3083   PetscErrorCode ierr;
3084 
3085   PetscFunctionBegin;
3086   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3087 
3088   /* read in matrix header */
3089   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3090   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3091   M  = header[1]; N = header[2]; nz = header[3];
3092   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3093   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3094   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3095 
3096   /* set block sizes from the viewer's .info file */
3097   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3098   /* set global sizes if not set already */
3099   if (mat->rmap->N < 0) mat->rmap->N = M;
3100   if (mat->cmap->N < 0) mat->cmap->N = N;
3101   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3102   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3103 
3104   /* check if the matrix sizes are correct */
3105   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3106   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3107 
3108   /* read in row lengths and build row indices */
3109   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3110   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3111   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3112   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3113   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3114   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3115   /* read in column indices and matrix values */
3116   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3117   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3118   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3119   /* store matrix indices and values */
3120   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3121   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3122   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3123   PetscFunctionReturn(0);
3124 }
3125 
3126 /* Not scalable because of ISAllGather() unless getting all columns. */
3127 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3128 {
3129   PetscErrorCode ierr;
3130   IS             iscol_local;
3131   PetscBool      isstride;
3132   PetscMPIInt    lisstride=0,gisstride;
3133 
3134   PetscFunctionBegin;
3135   /* check if we are grabbing all columns*/
3136   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3137 
3138   if (isstride) {
3139     PetscInt  start,len,mstart,mlen;
3140     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3141     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3142     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3143     if (mstart == start && mlen-mstart == len) lisstride = 1;
3144   }
3145 
3146   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3147   if (gisstride) {
3148     PetscInt N;
3149     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3150     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3151     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3152     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3153   } else {
3154     PetscInt cbs;
3155     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3156     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3157     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3158   }
3159 
3160   *isseq = iscol_local;
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 /*
3165  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3166  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3167 
3168  Input Parameters:
3169    mat - matrix
3170    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3171            i.e., mat->rstart <= isrow[i] < mat->rend
3172    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3173            i.e., mat->cstart <= iscol[i] < mat->cend
3174  Output Parameter:
3175    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3176    iscol_o - sequential column index set for retrieving mat->B
3177    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3178  */
3179 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3180 {
3181   PetscErrorCode ierr;
3182   Vec            x,cmap;
3183   const PetscInt *is_idx;
3184   PetscScalar    *xarray,*cmaparray;
3185   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3186   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3187   Mat            B=a->B;
3188   Vec            lvec=a->lvec,lcmap;
3189   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3190   MPI_Comm       comm;
3191   VecScatter     Mvctx=a->Mvctx;
3192 
3193   PetscFunctionBegin;
3194   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3195   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3196 
3197   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3198   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3199   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3200   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3201   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3202 
3203   /* Get start indices */
3204   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3205   isstart -= ncols;
3206   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3207 
3208   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3209   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3210   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3211   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3212   for (i=0; i<ncols; i++) {
3213     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3214     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3215     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3216   }
3217   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3218   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3219   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3220 
3221   /* Get iscol_d */
3222   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3223   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3224   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3225 
3226   /* Get isrow_d */
3227   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3228   rstart = mat->rmap->rstart;
3229   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3230   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3231   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3232   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3233 
3234   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3235   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3236   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3237 
3238   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3239   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3240   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3241 
3242   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3243 
3244   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3245   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3246 
3247   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3248   /* off-process column indices */
3249   count = 0;
3250   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3251   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3252 
3253   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3254   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3255   for (i=0; i<Bn; i++) {
3256     if (PetscRealPart(xarray[i]) > -1.0) {
3257       idx[count]     = i;                   /* local column index in off-diagonal part B */
3258       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3259       count++;
3260     }
3261   }
3262   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3263   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3264 
3265   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3266   /* cannot ensure iscol_o has same blocksize as iscol! */
3267 
3268   ierr = PetscFree(idx);CHKERRQ(ierr);
3269   *garray = cmap1;
3270 
3271   ierr = VecDestroy(&x);CHKERRQ(ierr);
3272   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3273   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3274   PetscFunctionReturn(0);
3275 }
3276 
3277 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3278 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3279 {
3280   PetscErrorCode ierr;
3281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3282   Mat            M = NULL;
3283   MPI_Comm       comm;
3284   IS             iscol_d,isrow_d,iscol_o;
3285   Mat            Asub = NULL,Bsub = NULL;
3286   PetscInt       n;
3287 
3288   PetscFunctionBegin;
3289   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3290 
3291   if (call == MAT_REUSE_MATRIX) {
3292     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3293     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3294     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3295 
3296     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3297     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3298 
3299     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3300     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3301 
3302     /* Update diagonal and off-diagonal portions of submat */
3303     asub = (Mat_MPIAIJ*)(*submat)->data;
3304     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3305     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3306     if (n) {
3307       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3308     }
3309     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3310     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3311 
3312   } else { /* call == MAT_INITIAL_MATRIX) */
3313     const PetscInt *garray;
3314     PetscInt        BsubN;
3315 
3316     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3317     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3318 
3319     /* Create local submatrices Asub and Bsub */
3320     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3321     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3322 
3323     /* Create submatrix M */
3324     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3325 
3326     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3327     asub = (Mat_MPIAIJ*)M->data;
3328 
3329     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3330     n = asub->B->cmap->N;
3331     if (BsubN > n) {
3332       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3333       const PetscInt *idx;
3334       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3335       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3336 
3337       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3338       j = 0;
3339       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3340       for (i=0; i<n; i++) {
3341         if (j >= BsubN) break;
3342         while (subgarray[i] > garray[j]) j++;
3343 
3344         if (subgarray[i] == garray[j]) {
3345           idx_new[i] = idx[j++];
3346         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3347       }
3348       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3349 
3350       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3351       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3352 
3353     } else if (BsubN < n) {
3354       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3355     }
3356 
3357     ierr = PetscFree(garray);CHKERRQ(ierr);
3358     *submat = M;
3359 
3360     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3361     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3362     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3363 
3364     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3365     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3366 
3367     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3368     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3369   }
3370   PetscFunctionReturn(0);
3371 }
3372 
3373 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3374 {
3375   PetscErrorCode ierr;
3376   IS             iscol_local=NULL,isrow_d;
3377   PetscInt       csize;
3378   PetscInt       n,i,j,start,end;
3379   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3380   MPI_Comm       comm;
3381 
3382   PetscFunctionBegin;
3383   /* If isrow has same processor distribution as mat,
3384      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3385   if (call == MAT_REUSE_MATRIX) {
3386     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3387     if (isrow_d) {
3388       sameRowDist  = PETSC_TRUE;
3389       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3390     } else {
3391       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3392       if (iscol_local) {
3393         sameRowDist  = PETSC_TRUE;
3394         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3395       }
3396     }
3397   } else {
3398     /* Check if isrow has same processor distribution as mat */
3399     sameDist[0] = PETSC_FALSE;
3400     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3401     if (!n) {
3402       sameDist[0] = PETSC_TRUE;
3403     } else {
3404       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3405       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3406       if (i >= start && j < end) {
3407         sameDist[0] = PETSC_TRUE;
3408       }
3409     }
3410 
3411     /* Check if iscol has same processor distribution as mat */
3412     sameDist[1] = PETSC_FALSE;
3413     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3414     if (!n) {
3415       sameDist[1] = PETSC_TRUE;
3416     } else {
3417       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3418       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3419       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3420     }
3421 
3422     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3423     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3424     sameRowDist = tsameDist[0];
3425   }
3426 
3427   if (sameRowDist) {
3428     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3429       /* isrow and iscol have same processor distribution as mat */
3430       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3431       PetscFunctionReturn(0);
3432     } else { /* sameRowDist */
3433       /* isrow has same processor distribution as mat */
3434       if (call == MAT_INITIAL_MATRIX) {
3435         PetscBool sorted;
3436         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3437         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3438         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3439         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3440 
3441         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3442         if (sorted) {
3443           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3444           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3445           PetscFunctionReturn(0);
3446         }
3447       } else { /* call == MAT_REUSE_MATRIX */
3448         IS    iscol_sub;
3449         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3450         if (iscol_sub) {
3451           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3452           PetscFunctionReturn(0);
3453         }
3454       }
3455     }
3456   }
3457 
3458   /* General case: iscol -> iscol_local which has global size of iscol */
3459   if (call == MAT_REUSE_MATRIX) {
3460     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3461     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3462   } else {
3463     if (!iscol_local) {
3464       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3465     }
3466   }
3467 
3468   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3469   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3470 
3471   if (call == MAT_INITIAL_MATRIX) {
3472     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3473     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3474   }
3475   PetscFunctionReturn(0);
3476 }
3477 
3478 /*@C
3479      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3480          and "off-diagonal" part of the matrix in CSR format.
3481 
3482    Collective
3483 
3484    Input Parameters:
3485 +  comm - MPI communicator
3486 .  A - "diagonal" portion of matrix
3487 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3488 -  garray - global index of B columns
3489 
3490    Output Parameter:
3491 .   mat - the matrix, with input A as its local diagonal matrix
3492    Level: advanced
3493 
3494    Notes:
3495        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3496        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3497 
3498 .seealso: MatCreateMPIAIJWithSplitArrays()
3499 @*/
3500 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3501 {
3502   PetscErrorCode ierr;
3503   Mat_MPIAIJ     *maij;
3504   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3505   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3506   PetscScalar    *oa=b->a;
3507   Mat            Bnew;
3508   PetscInt       m,n,N;
3509 
3510   PetscFunctionBegin;
3511   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3512   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3513   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3514   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3515   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3516   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3517 
3518   /* Get global columns of mat */
3519   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3520 
3521   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3522   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3523   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3524   maij = (Mat_MPIAIJ*)(*mat)->data;
3525 
3526   (*mat)->preallocated = PETSC_TRUE;
3527 
3528   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3529   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3530 
3531   /* Set A as diagonal portion of *mat */
3532   maij->A = A;
3533 
3534   nz = oi[m];
3535   for (i=0; i<nz; i++) {
3536     col   = oj[i];
3537     oj[i] = garray[col];
3538   }
3539 
3540    /* Set Bnew as off-diagonal portion of *mat */
3541   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3542   bnew        = (Mat_SeqAIJ*)Bnew->data;
3543   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3544   maij->B     = Bnew;
3545 
3546   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3547 
3548   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3549   b->free_a       = PETSC_FALSE;
3550   b->free_ij      = PETSC_FALSE;
3551   ierr = MatDestroy(&B);CHKERRQ(ierr);
3552 
3553   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3554   bnew->free_a       = PETSC_TRUE;
3555   bnew->free_ij      = PETSC_TRUE;
3556 
3557   /* condense columns of maij->B */
3558   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3559   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3560   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3561   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3562   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3567 
3568 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3569 {
3570   PetscErrorCode ierr;
3571   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3572   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3573   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3574   Mat            M,Msub,B=a->B;
3575   MatScalar      *aa;
3576   Mat_SeqAIJ     *aij;
3577   PetscInt       *garray = a->garray,*colsub,Ncols;
3578   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3579   IS             iscol_sub,iscmap;
3580   const PetscInt *is_idx,*cmap;
3581   PetscBool      allcolumns=PETSC_FALSE;
3582   MPI_Comm       comm;
3583 
3584   PetscFunctionBegin;
3585   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3586 
3587   if (call == MAT_REUSE_MATRIX) {
3588     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3589     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3590     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3591 
3592     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3593     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3594 
3595     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3596     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3597 
3598     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3599 
3600   } else { /* call == MAT_INITIAL_MATRIX) */
3601     PetscBool flg;
3602 
3603     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3604     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3605 
3606     /* (1) iscol -> nonscalable iscol_local */
3607     /* Check for special case: each processor gets entire matrix columns */
3608     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3609     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3610     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3611     if (allcolumns) {
3612       iscol_sub = iscol_local;
3613       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3614       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3615 
3616     } else {
3617       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3618       PetscInt *idx,*cmap1,k;
3619       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3620       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3621       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3622       count = 0;
3623       k     = 0;
3624       for (i=0; i<Ncols; i++) {
3625         j = is_idx[i];
3626         if (j >= cstart && j < cend) {
3627           /* diagonal part of mat */
3628           idx[count]     = j;
3629           cmap1[count++] = i; /* column index in submat */
3630         } else if (Bn) {
3631           /* off-diagonal part of mat */
3632           if (j == garray[k]) {
3633             idx[count]     = j;
3634             cmap1[count++] = i;  /* column index in submat */
3635           } else if (j > garray[k]) {
3636             while (j > garray[k] && k < Bn-1) k++;
3637             if (j == garray[k]) {
3638               idx[count]     = j;
3639               cmap1[count++] = i; /* column index in submat */
3640             }
3641           }
3642         }
3643       }
3644       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3645 
3646       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3647       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3648       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3649 
3650       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3651     }
3652 
3653     /* (3) Create sequential Msub */
3654     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3655   }
3656 
3657   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3658   aij  = (Mat_SeqAIJ*)(Msub)->data;
3659   ii   = aij->i;
3660   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3661 
3662   /*
3663       m - number of local rows
3664       Ncols - number of columns (same on all processors)
3665       rstart - first row in new global matrix generated
3666   */
3667   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3668 
3669   if (call == MAT_INITIAL_MATRIX) {
3670     /* (4) Create parallel newmat */
3671     PetscMPIInt    rank,size;
3672     PetscInt       csize;
3673 
3674     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3675     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3676 
3677     /*
3678         Determine the number of non-zeros in the diagonal and off-diagonal
3679         portions of the matrix in order to do correct preallocation
3680     */
3681 
3682     /* first get start and end of "diagonal" columns */
3683     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3684     if (csize == PETSC_DECIDE) {
3685       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3686       if (mglobal == Ncols) { /* square matrix */
3687         nlocal = m;
3688       } else {
3689         nlocal = Ncols/size + ((Ncols % size) > rank);
3690       }
3691     } else {
3692       nlocal = csize;
3693     }
3694     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3695     rstart = rend - nlocal;
3696     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3697 
3698     /* next, compute all the lengths */
3699     jj    = aij->j;
3700     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3701     olens = dlens + m;
3702     for (i=0; i<m; i++) {
3703       jend = ii[i+1] - ii[i];
3704       olen = 0;
3705       dlen = 0;
3706       for (j=0; j<jend; j++) {
3707         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3708         else dlen++;
3709         jj++;
3710       }
3711       olens[i] = olen;
3712       dlens[i] = dlen;
3713     }
3714 
3715     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3716     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3717 
3718     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3719     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3720     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3721     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3722     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3723     ierr = PetscFree(dlens);CHKERRQ(ierr);
3724 
3725   } else { /* call == MAT_REUSE_MATRIX */
3726     M    = *newmat;
3727     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3728     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3729     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3730     /*
3731          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3732        rather than the slower MatSetValues().
3733     */
3734     M->was_assembled = PETSC_TRUE;
3735     M->assembled     = PETSC_FALSE;
3736   }
3737 
3738   /* (5) Set values of Msub to *newmat */
3739   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3740   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3741 
3742   jj   = aij->j;
3743   aa   = aij->a;
3744   for (i=0; i<m; i++) {
3745     row = rstart + i;
3746     nz  = ii[i+1] - ii[i];
3747     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3748     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3749     jj += nz; aa += nz;
3750   }
3751   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3752 
3753   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3754   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755 
3756   ierr = PetscFree(colsub);CHKERRQ(ierr);
3757 
3758   /* save Msub, iscol_sub and iscmap used in processor for next request */
3759   if (call ==  MAT_INITIAL_MATRIX) {
3760     *newmat = M;
3761     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3762     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3763 
3764     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3765     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3766 
3767     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3768     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3769 
3770     if (iscol_local) {
3771       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3772       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3773     }
3774   }
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 /*
3779     Not great since it makes two copies of the submatrix, first an SeqAIJ
3780   in local and then by concatenating the local matrices the end result.
3781   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3782 
3783   Note: This requires a sequential iscol with all indices.
3784 */
3785 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3786 {
3787   PetscErrorCode ierr;
3788   PetscMPIInt    rank,size;
3789   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3790   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3791   Mat            M,Mreuse;
3792   MatScalar      *aa,*vwork;
3793   MPI_Comm       comm;
3794   Mat_SeqAIJ     *aij;
3795   PetscBool      colflag,allcolumns=PETSC_FALSE;
3796 
3797   PetscFunctionBegin;
3798   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3799   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3800   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3801 
3802   /* Check for special case: each processor gets entire matrix columns */
3803   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3804   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3805   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3806   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3807 
3808   if (call ==  MAT_REUSE_MATRIX) {
3809     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3810     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3811     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3812   } else {
3813     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3814   }
3815 
3816   /*
3817       m - number of local rows
3818       n - number of columns (same on all processors)
3819       rstart - first row in new global matrix generated
3820   */
3821   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3822   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3823   if (call == MAT_INITIAL_MATRIX) {
3824     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3825     ii  = aij->i;
3826     jj  = aij->j;
3827 
3828     /*
3829         Determine the number of non-zeros in the diagonal and off-diagonal
3830         portions of the matrix in order to do correct preallocation
3831     */
3832 
3833     /* first get start and end of "diagonal" columns */
3834     if (csize == PETSC_DECIDE) {
3835       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3836       if (mglobal == n) { /* square matrix */
3837         nlocal = m;
3838       } else {
3839         nlocal = n/size + ((n % size) > rank);
3840       }
3841     } else {
3842       nlocal = csize;
3843     }
3844     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3845     rstart = rend - nlocal;
3846     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3847 
3848     /* next, compute all the lengths */
3849     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3850     olens = dlens + m;
3851     for (i=0; i<m; i++) {
3852       jend = ii[i+1] - ii[i];
3853       olen = 0;
3854       dlen = 0;
3855       for (j=0; j<jend; j++) {
3856         if (*jj < rstart || *jj >= rend) olen++;
3857         else dlen++;
3858         jj++;
3859       }
3860       olens[i] = olen;
3861       dlens[i] = dlen;
3862     }
3863     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3864     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3865     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3866     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3867     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3868     ierr = PetscFree(dlens);CHKERRQ(ierr);
3869   } else {
3870     PetscInt ml,nl;
3871 
3872     M    = *newmat;
3873     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3874     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3875     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3876     /*
3877          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3878        rather than the slower MatSetValues().
3879     */
3880     M->was_assembled = PETSC_TRUE;
3881     M->assembled     = PETSC_FALSE;
3882   }
3883   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3884   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3885   ii   = aij->i;
3886   jj   = aij->j;
3887   aa   = aij->a;
3888   for (i=0; i<m; i++) {
3889     row   = rstart + i;
3890     nz    = ii[i+1] - ii[i];
3891     cwork = jj;     jj += nz;
3892     vwork = aa;     aa += nz;
3893     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3894   }
3895 
3896   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3897   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3898   *newmat = M;
3899 
3900   /* save submatrix used in processor for next request */
3901   if (call ==  MAT_INITIAL_MATRIX) {
3902     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3903     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3904   }
3905   PetscFunctionReturn(0);
3906 }
3907 
3908 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3909 {
3910   PetscInt       m,cstart, cend,j,nnz,i,d;
3911   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3912   const PetscInt *JJ;
3913   PetscErrorCode ierr;
3914   PetscBool      nooffprocentries;
3915 
3916   PetscFunctionBegin;
3917   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3918 
3919   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3920   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3921   m      = B->rmap->n;
3922   cstart = B->cmap->rstart;
3923   cend   = B->cmap->rend;
3924   rstart = B->rmap->rstart;
3925 
3926   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3927 
3928   if (PetscDefined(USE_DEBUG)) {
3929     for (i=0; i<m; i++) {
3930       nnz = Ii[i+1]- Ii[i];
3931       JJ  = J + Ii[i];
3932       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3933       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3934       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3935     }
3936   }
3937 
3938   for (i=0; i<m; i++) {
3939     nnz     = Ii[i+1]- Ii[i];
3940     JJ      = J + Ii[i];
3941     nnz_max = PetscMax(nnz_max,nnz);
3942     d       = 0;
3943     for (j=0; j<nnz; j++) {
3944       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3945     }
3946     d_nnz[i] = d;
3947     o_nnz[i] = nnz - d;
3948   }
3949   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3950   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3951 
3952   for (i=0; i<m; i++) {
3953     ii   = i + rstart;
3954     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3955   }
3956   nooffprocentries    = B->nooffprocentries;
3957   B->nooffprocentries = PETSC_TRUE;
3958   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3959   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3960   B->nooffprocentries = nooffprocentries;
3961 
3962   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 /*@
3967    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3968    (the default parallel PETSc format).
3969 
3970    Collective
3971 
3972    Input Parameters:
3973 +  B - the matrix
3974 .  i - the indices into j for the start of each local row (starts with zero)
3975 .  j - the column indices for each local row (starts with zero)
3976 -  v - optional values in the matrix
3977 
3978    Level: developer
3979 
3980    Notes:
3981        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3982      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3983      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3984 
3985        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3986 
3987        The format which is used for the sparse matrix input, is equivalent to a
3988     row-major ordering.. i.e for the following matrix, the input data expected is
3989     as shown
3990 
3991 $        1 0 0
3992 $        2 0 3     P0
3993 $       -------
3994 $        4 5 6     P1
3995 $
3996 $     Process0 [P0]: rows_owned=[0,1]
3997 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3998 $        j =  {0,0,2}  [size = 3]
3999 $        v =  {1,2,3}  [size = 3]
4000 $
4001 $     Process1 [P1]: rows_owned=[2]
4002 $        i =  {0,3}    [size = nrow+1  = 1+1]
4003 $        j =  {0,1,2}  [size = 3]
4004 $        v =  {4,5,6}  [size = 3]
4005 
4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4007           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4008 @*/
4009 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4010 {
4011   PetscErrorCode ierr;
4012 
4013   PetscFunctionBegin;
4014   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 /*@C
4019    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4020    (the default parallel PETSc format).  For good matrix assembly performance
4021    the user should preallocate the matrix storage by setting the parameters
4022    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4023    performance can be increased by more than a factor of 50.
4024 
4025    Collective
4026 
4027    Input Parameters:
4028 +  B - the matrix
4029 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4030            (same value is used for all local rows)
4031 .  d_nnz - array containing the number of nonzeros in the various rows of the
4032            DIAGONAL portion of the local submatrix (possibly different for each row)
4033            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4034            The size of this array is equal to the number of local rows, i.e 'm'.
4035            For matrices that will be factored, you must leave room for (and set)
4036            the diagonal entry even if it is zero.
4037 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4038            submatrix (same value is used for all local rows).
4039 -  o_nnz - array containing the number of nonzeros in the various rows of the
4040            OFF-DIAGONAL portion of the local submatrix (possibly different for
4041            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4042            structure. The size of this array is equal to the number
4043            of local rows, i.e 'm'.
4044 
4045    If the *_nnz parameter is given then the *_nz parameter is ignored
4046 
4047    The AIJ format (also called the Yale sparse matrix format or
4048    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4049    storage.  The stored row and column indices begin with zero.
4050    See Users-Manual: ch_mat for details.
4051 
4052    The parallel matrix is partitioned such that the first m0 rows belong to
4053    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4054    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4055 
4056    The DIAGONAL portion of the local submatrix of a processor can be defined
4057    as the submatrix which is obtained by extraction the part corresponding to
4058    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4059    first row that belongs to the processor, r2 is the last row belonging to
4060    the this processor, and c1-c2 is range of indices of the local part of a
4061    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4062    common case of a square matrix, the row and column ranges are the same and
4063    the DIAGONAL part is also square. The remaining portion of the local
4064    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4065 
4066    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4067 
4068    You can call MatGetInfo() to get information on how effective the preallocation was;
4069    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4070    You can also run with the option -info and look for messages with the string
4071    malloc in them to see if additional memory allocation was needed.
4072 
4073    Example usage:
4074 
4075    Consider the following 8x8 matrix with 34 non-zero values, that is
4076    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4077    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4078    as follows:
4079 
4080 .vb
4081             1  2  0  |  0  3  0  |  0  4
4082     Proc0   0  5  6  |  7  0  0  |  8  0
4083             9  0 10  | 11  0  0  | 12  0
4084     -------------------------------------
4085            13  0 14  | 15 16 17  |  0  0
4086     Proc1   0 18  0  | 19 20 21  |  0  0
4087             0  0  0  | 22 23  0  | 24  0
4088     -------------------------------------
4089     Proc2  25 26 27  |  0  0 28  | 29  0
4090            30  0  0  | 31 32 33  |  0 34
4091 .ve
4092 
4093    This can be represented as a collection of submatrices as:
4094 
4095 .vb
4096       A B C
4097       D E F
4098       G H I
4099 .ve
4100 
4101    Where the submatrices A,B,C are owned by proc0, D,E,F are
4102    owned by proc1, G,H,I are owned by proc2.
4103 
4104    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4106    The 'M','N' parameters are 8,8, and have the same values on all procs.
4107 
4108    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4109    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4110    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4111    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4112    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4113    matrix, ans [DF] as another SeqAIJ matrix.
4114 
4115    When d_nz, o_nz parameters are specified, d_nz storage elements are
4116    allocated for every row of the local diagonal submatrix, and o_nz
4117    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4118    One way to choose d_nz and o_nz is to use the max nonzerors per local
4119    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4120    In this case, the values of d_nz,o_nz are:
4121 .vb
4122      proc0 : dnz = 2, o_nz = 2
4123      proc1 : dnz = 3, o_nz = 2
4124      proc2 : dnz = 1, o_nz = 4
4125 .ve
4126    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4127    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4128    for proc3. i.e we are using 12+15+10=37 storage locations to store
4129    34 values.
4130 
4131    When d_nnz, o_nnz parameters are specified, the storage is specified
4132    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4133    In the above case the values for d_nnz,o_nnz are:
4134 .vb
4135      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4136      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4137      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4138 .ve
4139    Here the space allocated is sum of all the above values i.e 34, and
4140    hence pre-allocation is perfect.
4141 
4142    Level: intermediate
4143 
4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4145           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4146 @*/
4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4148 {
4149   PetscErrorCode ierr;
4150 
4151   PetscFunctionBegin;
4152   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4153   PetscValidType(B,1);
4154   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 /*@
4159      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4160          CSR format for the local rows.
4161 
4162    Collective
4163 
4164    Input Parameters:
4165 +  comm - MPI communicator
4166 .  m - number of local rows (Cannot be PETSC_DECIDE)
4167 .  n - This value should be the same as the local size used in creating the
4168        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4169        calculated if N is given) For square matrices n is almost always m.
4170 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4171 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4172 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4173 .   j - column indices
4174 -   a - matrix values
4175 
4176    Output Parameter:
4177 .   mat - the matrix
4178 
4179    Level: intermediate
4180 
4181    Notes:
4182        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4183      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4184      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4185 
4186        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4187 
4188        The format which is used for the sparse matrix input, is equivalent to a
4189     row-major ordering.. i.e for the following matrix, the input data expected is
4190     as shown
4191 
4192        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4193 
4194 $        1 0 0
4195 $        2 0 3     P0
4196 $       -------
4197 $        4 5 6     P1
4198 $
4199 $     Process0 [P0]: rows_owned=[0,1]
4200 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4201 $        j =  {0,0,2}  [size = 3]
4202 $        v =  {1,2,3}  [size = 3]
4203 $
4204 $     Process1 [P1]: rows_owned=[2]
4205 $        i =  {0,3}    [size = nrow+1  = 1+1]
4206 $        j =  {0,1,2}  [size = 3]
4207 $        v =  {4,5,6}  [size = 3]
4208 
4209 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4210           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4211 @*/
4212 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4213 {
4214   PetscErrorCode ierr;
4215 
4216   PetscFunctionBegin;
4217   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4218   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4219   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4220   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4221   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4222   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4223   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 /*@
4228      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4229          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4230 
4231    Collective
4232 
4233    Input Parameters:
4234 +  mat - the matrix
4235 .  m - number of local rows (Cannot be PETSC_DECIDE)
4236 .  n - This value should be the same as the local size used in creating the
4237        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4238        calculated if N is given) For square matrices n is almost always m.
4239 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4240 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4241 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4242 .  J - column indices
4243 -  v - matrix values
4244 
4245    Level: intermediate
4246 
4247 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4248           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4249 @*/
4250 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4251 {
4252   PetscErrorCode ierr;
4253   PetscInt       cstart,nnz,i,j;
4254   PetscInt       *ld;
4255   PetscBool      nooffprocentries;
4256   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4257   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4258   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4259   const PetscInt *Adi = Ad->i;
4260   PetscInt       ldi,Iii,md;
4261 
4262   PetscFunctionBegin;
4263   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4264   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4265   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4266   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4267 
4268   cstart = mat->cmap->rstart;
4269   if (!Aij->ld) {
4270     /* count number of entries below block diagonal */
4271     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4272     Aij->ld = ld;
4273     for (i=0; i<m; i++) {
4274       nnz  = Ii[i+1]- Ii[i];
4275       j     = 0;
4276       while  (J[j] < cstart && j < nnz) {j++;}
4277       J    += nnz;
4278       ld[i] = j;
4279     }
4280   } else {
4281     ld = Aij->ld;
4282   }
4283 
4284   for (i=0; i<m; i++) {
4285     nnz  = Ii[i+1]- Ii[i];
4286     Iii  = Ii[i];
4287     ldi  = ld[i];
4288     md   = Adi[i+1]-Adi[i];
4289     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4290     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4291     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4292     ad  += md;
4293     ao  += nnz - md;
4294   }
4295   nooffprocentries      = mat->nooffprocentries;
4296   mat->nooffprocentries = PETSC_TRUE;
4297   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4298   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4299   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4300   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4301   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4302   mat->nooffprocentries = nooffprocentries;
4303   PetscFunctionReturn(0);
4304 }
4305 
4306 /*@C
4307    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4308    (the default parallel PETSc format).  For good matrix assembly performance
4309    the user should preallocate the matrix storage by setting the parameters
4310    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4311    performance can be increased by more than a factor of 50.
4312 
4313    Collective
4314 
4315    Input Parameters:
4316 +  comm - MPI communicator
4317 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4318            This value should be the same as the local size used in creating the
4319            y vector for the matrix-vector product y = Ax.
4320 .  n - This value should be the same as the local size used in creating the
4321        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4322        calculated if N is given) For square matrices n is almost always m.
4323 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4324 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4325 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4326            (same value is used for all local rows)
4327 .  d_nnz - array containing the number of nonzeros in the various rows of the
4328            DIAGONAL portion of the local submatrix (possibly different for each row)
4329            or NULL, if d_nz is used to specify the nonzero structure.
4330            The size of this array is equal to the number of local rows, i.e 'm'.
4331 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4332            submatrix (same value is used for all local rows).
4333 -  o_nnz - array containing the number of nonzeros in the various rows of the
4334            OFF-DIAGONAL portion of the local submatrix (possibly different for
4335            each row) or NULL, if o_nz is used to specify the nonzero
4336            structure. The size of this array is equal to the number
4337            of local rows, i.e 'm'.
4338 
4339    Output Parameter:
4340 .  A - the matrix
4341 
4342    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4343    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4344    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4345 
4346    Notes:
4347    If the *_nnz parameter is given then the *_nz parameter is ignored
4348 
4349    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4350    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4351    storage requirements for this matrix.
4352 
4353    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4354    processor than it must be used on all processors that share the object for
4355    that argument.
4356 
4357    The user MUST specify either the local or global matrix dimensions
4358    (possibly both).
4359 
4360    The parallel matrix is partitioned across processors such that the
4361    first m0 rows belong to process 0, the next m1 rows belong to
4362    process 1, the next m2 rows belong to process 2 etc.. where
4363    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4364    values corresponding to [m x N] submatrix.
4365 
4366    The columns are logically partitioned with the n0 columns belonging
4367    to 0th partition, the next n1 columns belonging to the next
4368    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4369 
4370    The DIAGONAL portion of the local submatrix on any given processor
4371    is the submatrix corresponding to the rows and columns m,n
4372    corresponding to the given processor. i.e diagonal matrix on
4373    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4374    etc. The remaining portion of the local submatrix [m x (N-n)]
4375    constitute the OFF-DIAGONAL portion. The example below better
4376    illustrates this concept.
4377 
4378    For a square global matrix we define each processor's diagonal portion
4379    to be its local rows and the corresponding columns (a square submatrix);
4380    each processor's off-diagonal portion encompasses the remainder of the
4381    local matrix (a rectangular submatrix).
4382 
4383    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4384 
4385    When calling this routine with a single process communicator, a matrix of
4386    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4387    type of communicator, use the construction mechanism
4388 .vb
4389      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4390 .ve
4391 
4392 $     MatCreate(...,&A);
4393 $     MatSetType(A,MATMPIAIJ);
4394 $     MatSetSizes(A, m,n,M,N);
4395 $     MatMPIAIJSetPreallocation(A,...);
4396 
4397    By default, this format uses inodes (identical nodes) when possible.
4398    We search for consecutive rows with the same nonzero structure, thereby
4399    reusing matrix information to achieve increased efficiency.
4400 
4401    Options Database Keys:
4402 +  -mat_no_inode  - Do not use inodes
4403 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4404 
4405 
4406 
4407    Example usage:
4408 
4409    Consider the following 8x8 matrix with 34 non-zero values, that is
4410    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4411    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4412    as follows
4413 
4414 .vb
4415             1  2  0  |  0  3  0  |  0  4
4416     Proc0   0  5  6  |  7  0  0  |  8  0
4417             9  0 10  | 11  0  0  | 12  0
4418     -------------------------------------
4419            13  0 14  | 15 16 17  |  0  0
4420     Proc1   0 18  0  | 19 20 21  |  0  0
4421             0  0  0  | 22 23  0  | 24  0
4422     -------------------------------------
4423     Proc2  25 26 27  |  0  0 28  | 29  0
4424            30  0  0  | 31 32 33  |  0 34
4425 .ve
4426 
4427    This can be represented as a collection of submatrices as
4428 
4429 .vb
4430       A B C
4431       D E F
4432       G H I
4433 .ve
4434 
4435    Where the submatrices A,B,C are owned by proc0, D,E,F are
4436    owned by proc1, G,H,I are owned by proc2.
4437 
4438    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4439    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4440    The 'M','N' parameters are 8,8, and have the same values on all procs.
4441 
4442    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4443    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4444    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4445    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4446    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4447    matrix, ans [DF] as another SeqAIJ matrix.
4448 
4449    When d_nz, o_nz parameters are specified, d_nz storage elements are
4450    allocated for every row of the local diagonal submatrix, and o_nz
4451    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4452    One way to choose d_nz and o_nz is to use the max nonzerors per local
4453    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4454    In this case, the values of d_nz,o_nz are
4455 .vb
4456      proc0 : dnz = 2, o_nz = 2
4457      proc1 : dnz = 3, o_nz = 2
4458      proc2 : dnz = 1, o_nz = 4
4459 .ve
4460    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4461    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4462    for proc3. i.e we are using 12+15+10=37 storage locations to store
4463    34 values.
4464 
4465    When d_nnz, o_nnz parameters are specified, the storage is specified
4466    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4467    In the above case the values for d_nnz,o_nnz are
4468 .vb
4469      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4470      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4471      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4472 .ve
4473    Here the space allocated is sum of all the above values i.e 34, and
4474    hence pre-allocation is perfect.
4475 
4476    Level: intermediate
4477 
4478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4479           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4480 @*/
4481 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4482 {
4483   PetscErrorCode ierr;
4484   PetscMPIInt    size;
4485 
4486   PetscFunctionBegin;
4487   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4488   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4489   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4490   if (size > 1) {
4491     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4492     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4493   } else {
4494     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4495     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4496   }
4497   PetscFunctionReturn(0);
4498 }
4499 
4500 /*@C
4501   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4502 
4503   Not collective
4504 
4505   Input Parameter:
4506 . A - The MPIAIJ matrix
4507 
4508   Output Parameters:
4509 + Ad - The local diagonal block as a SeqAIJ matrix
4510 . Ao - The local off-diagonal block as a SeqAIJ matrix
4511 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4512 
4513   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4514   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4515   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4516   local column numbers to global column numbers in the original matrix.
4517 
4518   Level: intermediate
4519 
4520 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4521 @*/
4522 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4523 {
4524   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4525   PetscBool      flg;
4526   PetscErrorCode ierr;
4527 
4528   PetscFunctionBegin;
4529   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4530   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4531   if (Ad)     *Ad     = a->A;
4532   if (Ao)     *Ao     = a->B;
4533   if (colmap) *colmap = a->garray;
4534   PetscFunctionReturn(0);
4535 }
4536 
4537 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4538 {
4539   PetscErrorCode ierr;
4540   PetscInt       m,N,i,rstart,nnz,Ii;
4541   PetscInt       *indx;
4542   PetscScalar    *values;
4543 
4544   PetscFunctionBegin;
4545   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4546   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4547     PetscInt       *dnz,*onz,sum,bs,cbs;
4548 
4549     if (n == PETSC_DECIDE) {
4550       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4551     }
4552     /* Check sum(n) = N */
4553     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4554     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4555 
4556     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4557     rstart -= m;
4558 
4559     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4560     for (i=0; i<m; i++) {
4561       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4562       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4563       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4564     }
4565 
4566     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4567     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4568     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4569     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4570     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4571     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4572     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4573     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4574   }
4575 
4576   /* numeric phase */
4577   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4578   for (i=0; i<m; i++) {
4579     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4580     Ii   = i + rstart;
4581     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4582     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4583   }
4584   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4585   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4590 {
4591   PetscErrorCode    ierr;
4592   PetscMPIInt       rank;
4593   PetscInt          m,N,i,rstart,nnz;
4594   size_t            len;
4595   const PetscInt    *indx;
4596   PetscViewer       out;
4597   char              *name;
4598   Mat               B;
4599   const PetscScalar *values;
4600 
4601   PetscFunctionBegin;
4602   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4603   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4604   /* Should this be the type of the diagonal block of A? */
4605   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4606   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4607   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4608   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4609   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4610   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4611   for (i=0; i<m; i++) {
4612     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4613     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4614     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4615   }
4616   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4617   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4618 
4619   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4620   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4621   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4622   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4623   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4624   ierr = PetscFree(name);CHKERRQ(ierr);
4625   ierr = MatView(B,out);CHKERRQ(ierr);
4626   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4627   ierr = MatDestroy(&B);CHKERRQ(ierr);
4628   PetscFunctionReturn(0);
4629 }
4630 
4631 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4632 {
4633   PetscErrorCode      ierr;
4634   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4635 
4636   PetscFunctionBegin;
4637   if (!merge) PetscFunctionReturn(0);
4638   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4639   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4640   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4641   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4642   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4643   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4644   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4645   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4646   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4647   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4648   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4649   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4650   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4651   ierr = PetscFree(merge);CHKERRQ(ierr);
4652   PetscFunctionReturn(0);
4653 }
4654 
4655 #include <../src/mat/utils/freespace.h>
4656 #include <petscbt.h>
4657 
4658 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4659 {
4660   PetscErrorCode      ierr;
4661   MPI_Comm            comm;
4662   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4663   PetscMPIInt         size,rank,taga,*len_s;
4664   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4665   PetscInt            proc,m;
4666   PetscInt            **buf_ri,**buf_rj;
4667   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4668   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4669   MPI_Request         *s_waits,*r_waits;
4670   MPI_Status          *status;
4671   MatScalar           *aa=a->a;
4672   MatScalar           **abuf_r,*ba_i;
4673   Mat_Merge_SeqsToMPI *merge;
4674   PetscContainer      container;
4675 
4676   PetscFunctionBegin;
4677   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4678   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4679 
4680   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4681   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4682 
4683   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4684   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4685   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4686 
4687   bi     = merge->bi;
4688   bj     = merge->bj;
4689   buf_ri = merge->buf_ri;
4690   buf_rj = merge->buf_rj;
4691 
4692   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4693   owners = merge->rowmap->range;
4694   len_s  = merge->len_s;
4695 
4696   /* send and recv matrix values */
4697   /*-----------------------------*/
4698   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4699   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4700 
4701   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4702   for (proc=0,k=0; proc<size; proc++) {
4703     if (!len_s[proc]) continue;
4704     i    = owners[proc];
4705     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4706     k++;
4707   }
4708 
4709   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4710   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4711   ierr = PetscFree(status);CHKERRQ(ierr);
4712 
4713   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4714   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4715 
4716   /* insert mat values of mpimat */
4717   /*----------------------------*/
4718   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4719   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4720 
4721   for (k=0; k<merge->nrecv; k++) {
4722     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4723     nrows       = *(buf_ri_k[k]);
4724     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4725     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4726   }
4727 
4728   /* set values of ba */
4729   m = merge->rowmap->n;
4730   for (i=0; i<m; i++) {
4731     arow = owners[rank] + i;
4732     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4733     bnzi = bi[i+1] - bi[i];
4734     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4735 
4736     /* add local non-zero vals of this proc's seqmat into ba */
4737     anzi   = ai[arow+1] - ai[arow];
4738     aj     = a->j + ai[arow];
4739     aa     = a->a + ai[arow];
4740     nextaj = 0;
4741     for (j=0; nextaj<anzi; j++) {
4742       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4743         ba_i[j] += aa[nextaj++];
4744       }
4745     }
4746 
4747     /* add received vals into ba */
4748     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4749       /* i-th row */
4750       if (i == *nextrow[k]) {
4751         anzi   = *(nextai[k]+1) - *nextai[k];
4752         aj     = buf_rj[k] + *(nextai[k]);
4753         aa     = abuf_r[k] + *(nextai[k]);
4754         nextaj = 0;
4755         for (j=0; nextaj<anzi; j++) {
4756           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4757             ba_i[j] += aa[nextaj++];
4758           }
4759         }
4760         nextrow[k]++; nextai[k]++;
4761       }
4762     }
4763     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4764   }
4765   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4766   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4767 
4768   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4769   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4770   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4771   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4772   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4773   PetscFunctionReturn(0);
4774 }
4775 
4776 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4777 {
4778   PetscErrorCode      ierr;
4779   Mat                 B_mpi;
4780   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4781   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4782   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4783   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4784   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4785   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4786   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4787   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4788   MPI_Status          *status;
4789   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4790   PetscBT             lnkbt;
4791   Mat_Merge_SeqsToMPI *merge;
4792   PetscContainer      container;
4793 
4794   PetscFunctionBegin;
4795   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4796 
4797   /* make sure it is a PETSc comm */
4798   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4799   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4800   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4801 
4802   ierr = PetscNew(&merge);CHKERRQ(ierr);
4803   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4804 
4805   /* determine row ownership */
4806   /*---------------------------------------------------------*/
4807   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4808   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4809   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4810   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4811   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4812   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4813   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4814 
4815   m      = merge->rowmap->n;
4816   owners = merge->rowmap->range;
4817 
4818   /* determine the number of messages to send, their lengths */
4819   /*---------------------------------------------------------*/
4820   len_s = merge->len_s;
4821 
4822   len          = 0; /* length of buf_si[] */
4823   merge->nsend = 0;
4824   for (proc=0; proc<size; proc++) {
4825     len_si[proc] = 0;
4826     if (proc == rank) {
4827       len_s[proc] = 0;
4828     } else {
4829       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4830       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4831     }
4832     if (len_s[proc]) {
4833       merge->nsend++;
4834       nrows = 0;
4835       for (i=owners[proc]; i<owners[proc+1]; i++) {
4836         if (ai[i+1] > ai[i]) nrows++;
4837       }
4838       len_si[proc] = 2*(nrows+1);
4839       len         += len_si[proc];
4840     }
4841   }
4842 
4843   /* determine the number and length of messages to receive for ij-structure */
4844   /*-------------------------------------------------------------------------*/
4845   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4846   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4847 
4848   /* post the Irecv of j-structure */
4849   /*-------------------------------*/
4850   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4851   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4852 
4853   /* post the Isend of j-structure */
4854   /*--------------------------------*/
4855   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4856 
4857   for (proc=0, k=0; proc<size; proc++) {
4858     if (!len_s[proc]) continue;
4859     i    = owners[proc];
4860     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4861     k++;
4862   }
4863 
4864   /* receives and sends of j-structure are complete */
4865   /*------------------------------------------------*/
4866   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4867   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4868 
4869   /* send and recv i-structure */
4870   /*---------------------------*/
4871   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4872   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4873 
4874   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4875   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4876   for (proc=0,k=0; proc<size; proc++) {
4877     if (!len_s[proc]) continue;
4878     /* form outgoing message for i-structure:
4879          buf_si[0]:                 nrows to be sent
4880                [1:nrows]:           row index (global)
4881                [nrows+1:2*nrows+1]: i-structure index
4882     */
4883     /*-------------------------------------------*/
4884     nrows       = len_si[proc]/2 - 1;
4885     buf_si_i    = buf_si + nrows+1;
4886     buf_si[0]   = nrows;
4887     buf_si_i[0] = 0;
4888     nrows       = 0;
4889     for (i=owners[proc]; i<owners[proc+1]; i++) {
4890       anzi = ai[i+1] - ai[i];
4891       if (anzi) {
4892         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4893         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4894         nrows++;
4895       }
4896     }
4897     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4898     k++;
4899     buf_si += len_si[proc];
4900   }
4901 
4902   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4903   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4904 
4905   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4906   for (i=0; i<merge->nrecv; i++) {
4907     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4908   }
4909 
4910   ierr = PetscFree(len_si);CHKERRQ(ierr);
4911   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4912   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4913   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4914   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4915   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4916   ierr = PetscFree(status);CHKERRQ(ierr);
4917 
4918   /* compute a local seq matrix in each processor */
4919   /*----------------------------------------------*/
4920   /* allocate bi array and free space for accumulating nonzero column info */
4921   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4922   bi[0] = 0;
4923 
4924   /* create and initialize a linked list */
4925   nlnk = N+1;
4926   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4927 
4928   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4929   len  = ai[owners[rank+1]] - ai[owners[rank]];
4930   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4931 
4932   current_space = free_space;
4933 
4934   /* determine symbolic info for each local row */
4935   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4936 
4937   for (k=0; k<merge->nrecv; k++) {
4938     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4939     nrows       = *buf_ri_k[k];
4940     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4941     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4942   }
4943 
4944   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4945   len  = 0;
4946   for (i=0; i<m; i++) {
4947     bnzi = 0;
4948     /* add local non-zero cols of this proc's seqmat into lnk */
4949     arow  = owners[rank] + i;
4950     anzi  = ai[arow+1] - ai[arow];
4951     aj    = a->j + ai[arow];
4952     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4953     bnzi += nlnk;
4954     /* add received col data into lnk */
4955     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4956       if (i == *nextrow[k]) { /* i-th row */
4957         anzi  = *(nextai[k]+1) - *nextai[k];
4958         aj    = buf_rj[k] + *nextai[k];
4959         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4960         bnzi += nlnk;
4961         nextrow[k]++; nextai[k]++;
4962       }
4963     }
4964     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4965 
4966     /* if free space is not available, make more free space */
4967     if (current_space->local_remaining<bnzi) {
4968       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4969       nspacedouble++;
4970     }
4971     /* copy data into free space, then initialize lnk */
4972     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4973     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4974 
4975     current_space->array           += bnzi;
4976     current_space->local_used      += bnzi;
4977     current_space->local_remaining -= bnzi;
4978 
4979     bi[i+1] = bi[i] + bnzi;
4980   }
4981 
4982   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4983 
4984   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4985   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4986   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4987 
4988   /* create symbolic parallel matrix B_mpi */
4989   /*---------------------------------------*/
4990   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4991   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4992   if (n==PETSC_DECIDE) {
4993     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4994   } else {
4995     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4996   }
4997   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4998   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4999   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5000   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5001   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5002 
5003   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5004   B_mpi->assembled  = PETSC_FALSE;
5005   merge->bi         = bi;
5006   merge->bj         = bj;
5007   merge->buf_ri     = buf_ri;
5008   merge->buf_rj     = buf_rj;
5009   merge->coi        = NULL;
5010   merge->coj        = NULL;
5011   merge->owners_co  = NULL;
5012 
5013   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5014 
5015   /* attach the supporting struct to B_mpi for reuse */
5016   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5017   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5018   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5019   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5020   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5021   *mpimat = B_mpi;
5022 
5023   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5024   PetscFunctionReturn(0);
5025 }
5026 
5027 /*@C
5028       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5029                  matrices from each processor
5030 
5031     Collective
5032 
5033    Input Parameters:
5034 +    comm - the communicators the parallel matrix will live on
5035 .    seqmat - the input sequential matrices
5036 .    m - number of local rows (or PETSC_DECIDE)
5037 .    n - number of local columns (or PETSC_DECIDE)
5038 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5039 
5040    Output Parameter:
5041 .    mpimat - the parallel matrix generated
5042 
5043     Level: advanced
5044 
5045    Notes:
5046      The dimensions of the sequential matrix in each processor MUST be the same.
5047      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5048      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5049 @*/
5050 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5051 {
5052   PetscErrorCode ierr;
5053   PetscMPIInt    size;
5054 
5055   PetscFunctionBegin;
5056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5057   if (size == 1) {
5058     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5059     if (scall == MAT_INITIAL_MATRIX) {
5060       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5061     } else {
5062       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5063     }
5064     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5065     PetscFunctionReturn(0);
5066   }
5067   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5068   if (scall == MAT_INITIAL_MATRIX) {
5069     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5070   }
5071   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5072   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5073   PetscFunctionReturn(0);
5074 }
5075 
5076 /*@
5077      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5078           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5079           with MatGetSize()
5080 
5081     Not Collective
5082 
5083    Input Parameters:
5084 +    A - the matrix
5085 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5086 
5087    Output Parameter:
5088 .    A_loc - the local sequential matrix generated
5089 
5090     Level: developer
5091 
5092    Notes:
5093      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5094      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5095      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5096      modify the values of the returned A_loc.
5097 
5098 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5099 
5100 @*/
5101 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5102 {
5103   PetscErrorCode ierr;
5104   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5105   Mat_SeqAIJ     *mat,*a,*b;
5106   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5107   MatScalar      *aa,*ba,*cam;
5108   PetscScalar    *ca;
5109   PetscMPIInt    size;
5110   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5111   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5112   PetscBool      match;
5113 
5114   PetscFunctionBegin;
5115   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5116   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5117   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5118   if (size == 1) {
5119     if (scall == MAT_INITIAL_MATRIX) {
5120       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5121       *A_loc = mpimat->A;
5122     } else if (scall == MAT_REUSE_MATRIX) {
5123       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5124     }
5125     PetscFunctionReturn(0);
5126   }
5127 
5128   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5129   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5130   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5131   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5132   aa = a->a; ba = b->a;
5133   if (scall == MAT_INITIAL_MATRIX) {
5134     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5135     ci[0] = 0;
5136     for (i=0; i<am; i++) {
5137       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5138     }
5139     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5140     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5141     k    = 0;
5142     for (i=0; i<am; i++) {
5143       ncols_o = bi[i+1] - bi[i];
5144       ncols_d = ai[i+1] - ai[i];
5145       /* off-diagonal portion of A */
5146       for (jo=0; jo<ncols_o; jo++) {
5147         col = cmap[*bj];
5148         if (col >= cstart) break;
5149         cj[k]   = col; bj++;
5150         ca[k++] = *ba++;
5151       }
5152       /* diagonal portion of A */
5153       for (j=0; j<ncols_d; j++) {
5154         cj[k]   = cstart + *aj++;
5155         ca[k++] = *aa++;
5156       }
5157       /* off-diagonal portion of A */
5158       for (j=jo; j<ncols_o; j++) {
5159         cj[k]   = cmap[*bj++];
5160         ca[k++] = *ba++;
5161       }
5162     }
5163     /* put together the new matrix */
5164     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5165     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5166     /* Since these are PETSc arrays, change flags to free them as necessary. */
5167     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5168     mat->free_a  = PETSC_TRUE;
5169     mat->free_ij = PETSC_TRUE;
5170     mat->nonew   = 0;
5171   } else if (scall == MAT_REUSE_MATRIX) {
5172     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5173     ci = mat->i; cj = mat->j; cam = mat->a;
5174     for (i=0; i<am; i++) {
5175       /* off-diagonal portion of A */
5176       ncols_o = bi[i+1] - bi[i];
5177       for (jo=0; jo<ncols_o; jo++) {
5178         col = cmap[*bj];
5179         if (col >= cstart) break;
5180         *cam++ = *ba++; bj++;
5181       }
5182       /* diagonal portion of A */
5183       ncols_d = ai[i+1] - ai[i];
5184       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5185       /* off-diagonal portion of A */
5186       for (j=jo; j<ncols_o; j++) {
5187         *cam++ = *ba++; bj++;
5188       }
5189     }
5190   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5191   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5192   PetscFunctionReturn(0);
5193 }
5194 
5195 /*@C
5196      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5197 
5198     Not Collective
5199 
5200    Input Parameters:
5201 +    A - the matrix
5202 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5203 -    row, col - index sets of rows and columns to extract (or NULL)
5204 
5205    Output Parameter:
5206 .    A_loc - the local sequential matrix generated
5207 
5208     Level: developer
5209 
5210 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5211 
5212 @*/
5213 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5214 {
5215   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5216   PetscErrorCode ierr;
5217   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5218   IS             isrowa,iscola;
5219   Mat            *aloc;
5220   PetscBool      match;
5221 
5222   PetscFunctionBegin;
5223   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5224   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5225   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5226   if (!row) {
5227     start = A->rmap->rstart; end = A->rmap->rend;
5228     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5229   } else {
5230     isrowa = *row;
5231   }
5232   if (!col) {
5233     start = A->cmap->rstart;
5234     cmap  = a->garray;
5235     nzA   = a->A->cmap->n;
5236     nzB   = a->B->cmap->n;
5237     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5238     ncols = 0;
5239     for (i=0; i<nzB; i++) {
5240       if (cmap[i] < start) idx[ncols++] = cmap[i];
5241       else break;
5242     }
5243     imark = i;
5244     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5245     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5246     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5247   } else {
5248     iscola = *col;
5249   }
5250   if (scall != MAT_INITIAL_MATRIX) {
5251     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5252     aloc[0] = *A_loc;
5253   }
5254   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5255   if (!col) { /* attach global id of condensed columns */
5256     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5257   }
5258   *A_loc = aloc[0];
5259   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5260   if (!row) {
5261     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5262   }
5263   if (!col) {
5264     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5265   }
5266   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5267   PetscFunctionReturn(0);
5268 }
5269 
5270 /*
5271  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5272  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5273  * on a global size.
5274  * */
5275 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5276 {
5277   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5278   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5279   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5280   PetscMPIInt              owner;
5281   PetscSFNode              *iremote,*oiremote;
5282   const PetscInt           *lrowindices;
5283   PetscErrorCode           ierr;
5284   PetscSF                  sf,osf;
5285   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5286   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5287   MPI_Comm                 comm;
5288   ISLocalToGlobalMapping   mapping;
5289 
5290   PetscFunctionBegin;
5291   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5292   /* plocalsize is the number of roots
5293    * nrows is the number of leaves
5294    * */
5295   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5296   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5297   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5298   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5299   for (i=0;i<nrows;i++) {
5300     /* Find a remote index and an owner for a row
5301      * The row could be local or remote
5302      * */
5303     owner = 0;
5304     lidx  = 0;
5305     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5306     iremote[i].index = lidx;
5307     iremote[i].rank  = owner;
5308   }
5309   /* Create SF to communicate how many nonzero columns for each row */
5310   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5311   /* SF will figure out the number of nonzero colunms for each row, and their
5312    * offsets
5313    * */
5314   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5315   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5316   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5317 
5318   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5319   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5320   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5321   roffsets[0] = 0;
5322   roffsets[1] = 0;
5323   for (i=0;i<plocalsize;i++) {
5324     /* diag */
5325     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5326     /* off diag */
5327     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5328     /* compute offsets so that we relative location for each row */
5329     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5330     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5331   }
5332   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5333   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5334   /* 'r' means root, and 'l' means leaf */
5335   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5336   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5337   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5338   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5339   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5340   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5341   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5342   dntotalcols = 0;
5343   ontotalcols = 0;
5344   ncol = 0;
5345   for (i=0;i<nrows;i++) {
5346     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5347     ncol = PetscMax(pnnz[i],ncol);
5348     /* diag */
5349     dntotalcols += nlcols[i*2+0];
5350     /* off diag */
5351     ontotalcols += nlcols[i*2+1];
5352   }
5353   /* We do not need to figure the right number of columns
5354    * since all the calculations will be done by going through the raw data
5355    * */
5356   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5357   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5358   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5359   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5360   /* diag */
5361   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5362   /* off diag */
5363   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5364   /* diag */
5365   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5366   /* off diag */
5367   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5368   dntotalcols = 0;
5369   ontotalcols = 0;
5370   ntotalcols  = 0;
5371   for (i=0;i<nrows;i++) {
5372     owner = 0;
5373     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5374     /* Set iremote for diag matrix */
5375     for (j=0;j<nlcols[i*2+0];j++) {
5376       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5377       iremote[dntotalcols].rank    = owner;
5378       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5379       ilocal[dntotalcols++]        = ntotalcols++;
5380     }
5381     /* off diag */
5382     for (j=0;j<nlcols[i*2+1];j++) {
5383       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5384       oiremote[ontotalcols].rank    = owner;
5385       oilocal[ontotalcols++]        = ntotalcols++;
5386     }
5387   }
5388   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5389   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5390   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5391   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5392   /* P serves as roots and P_oth is leaves
5393    * Diag matrix
5394    * */
5395   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5396   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5397   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5398 
5399   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5400   /* Off diag */
5401   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5402   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5403   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5404   /* We operate on the matrix internal data for saving memory */
5405   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5406   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5407   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5408   /* Convert to global indices for diag matrix */
5409   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5410   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5411   /* We want P_oth store global indices */
5412   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5413   /* Use memory scalable approach */
5414   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5415   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5416   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5417   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5418   /* Convert back to local indices */
5419   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5420   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5421   nout = 0;
5422   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5423   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5424   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5425   /* Exchange values */
5426   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5427   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5428   /* Stop PETSc from shrinking memory */
5429   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5430   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5431   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5432   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5433   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5434   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5435   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5436   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5437   PetscFunctionReturn(0);
5438 }
5439 
5440 /*
5441  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5442  * This supports MPIAIJ and MAIJ
5443  * */
5444 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5445 {
5446   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5447   Mat_SeqAIJ            *p_oth;
5448   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5449   IS                    rows,map;
5450   PetscHMapI            hamp;
5451   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5452   MPI_Comm              comm;
5453   PetscSF               sf,osf;
5454   PetscBool             has;
5455   PetscErrorCode        ierr;
5456 
5457   PetscFunctionBegin;
5458   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5459   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5460   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5461    *  and then create a submatrix (that often is an overlapping matrix)
5462    * */
5463   if (reuse == MAT_INITIAL_MATRIX) {
5464     /* Use a hash table to figure out unique keys */
5465     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5466     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5467     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5468     count = 0;
5469     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5470     for (i=0;i<a->B->cmap->n;i++) {
5471       key  = a->garray[i]/dof;
5472       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5473       if (!has) {
5474         mapping[i] = count;
5475         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5476       } else {
5477         /* Current 'i' has the same value the previous step */
5478         mapping[i] = count-1;
5479       }
5480     }
5481     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5482     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5483     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5484     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5485     off = 0;
5486     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5487     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5488     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5489     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5490     /* In case, the matrix was already created but users want to recreate the matrix */
5491     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5492     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5493     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5494     ierr = ISDestroy(&map);CHKERRQ(ierr);
5495     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5496   } else if (reuse == MAT_REUSE_MATRIX) {
5497     /* If matrix was already created, we simply update values using SF objects
5498      * that as attached to the matrix ealier.
5499      *  */
5500     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5501     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5502     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5503     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5504     /* Update values in place */
5505     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5506     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5507     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5508     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5509   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5510   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5511   PetscFunctionReturn(0);
5512 }
5513 
5514 /*@C
5515     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5516 
5517     Collective on Mat
5518 
5519    Input Parameters:
5520 +    A,B - the matrices in mpiaij format
5521 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5522 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5523 
5524    Output Parameter:
5525 +    rowb, colb - index sets of rows and columns of B to extract
5526 -    B_seq - the sequential matrix generated
5527 
5528     Level: developer
5529 
5530 @*/
5531 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5532 {
5533   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5534   PetscErrorCode ierr;
5535   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5536   IS             isrowb,iscolb;
5537   Mat            *bseq=NULL;
5538 
5539   PetscFunctionBegin;
5540   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5541     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5542   }
5543   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5544 
5545   if (scall == MAT_INITIAL_MATRIX) {
5546     start = A->cmap->rstart;
5547     cmap  = a->garray;
5548     nzA   = a->A->cmap->n;
5549     nzB   = a->B->cmap->n;
5550     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5551     ncols = 0;
5552     for (i=0; i<nzB; i++) {  /* row < local row index */
5553       if (cmap[i] < start) idx[ncols++] = cmap[i];
5554       else break;
5555     }
5556     imark = i;
5557     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5558     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5559     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5560     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5561   } else {
5562     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5563     isrowb  = *rowb; iscolb = *colb;
5564     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5565     bseq[0] = *B_seq;
5566   }
5567   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5568   *B_seq = bseq[0];
5569   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5570   if (!rowb) {
5571     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5572   } else {
5573     *rowb = isrowb;
5574   }
5575   if (!colb) {
5576     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5577   } else {
5578     *colb = iscolb;
5579   }
5580   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5581   PetscFunctionReturn(0);
5582 }
5583 
5584 /*
5585     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5586     of the OFF-DIAGONAL portion of local A
5587 
5588     Collective on Mat
5589 
5590    Input Parameters:
5591 +    A,B - the matrices in mpiaij format
5592 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5593 
5594    Output Parameter:
5595 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5596 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5597 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5598 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5599 
5600     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5601      for this matrix. This is not desirable..
5602 
5603     Level: developer
5604 
5605 */
5606 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5607 {
5608   PetscErrorCode         ierr;
5609   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5610   Mat_SeqAIJ             *b_oth;
5611   VecScatter             ctx;
5612   MPI_Comm               comm;
5613   const PetscMPIInt      *rprocs,*sprocs;
5614   const PetscInt         *srow,*rstarts,*sstarts;
5615   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5616   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5617   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5618   MPI_Request            *rwaits = NULL,*swaits = NULL;
5619   MPI_Status             rstatus;
5620   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5621 
5622   PetscFunctionBegin;
5623   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5624   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5625 
5626   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5627     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5628   }
5629   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5630   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5631 
5632   if (size == 1) {
5633     startsj_s = NULL;
5634     bufa_ptr  = NULL;
5635     *B_oth    = NULL;
5636     PetscFunctionReturn(0);
5637   }
5638 
5639   ctx = a->Mvctx;
5640   tag = ((PetscObject)ctx)->tag;
5641 
5642   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5643   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5644   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5645   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5646   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5647   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5648   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5649 
5650   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5651   if (scall == MAT_INITIAL_MATRIX) {
5652     /* i-array */
5653     /*---------*/
5654     /*  post receives */
5655     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5656     for (i=0; i<nrecvs; i++) {
5657       rowlen = rvalues + rstarts[i]*rbs;
5658       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5659       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5660     }
5661 
5662     /* pack the outgoing message */
5663     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5664 
5665     sstartsj[0] = 0;
5666     rstartsj[0] = 0;
5667     len         = 0; /* total length of j or a array to be sent */
5668     if (nsends) {
5669       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5670       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5671     }
5672     for (i=0; i<nsends; i++) {
5673       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5674       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5675       for (j=0; j<nrows; j++) {
5676         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5677         for (l=0; l<sbs; l++) {
5678           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5679 
5680           rowlen[j*sbs+l] = ncols;
5681 
5682           len += ncols;
5683           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5684         }
5685         k++;
5686       }
5687       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5688 
5689       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5690     }
5691     /* recvs and sends of i-array are completed */
5692     i = nrecvs;
5693     while (i--) {
5694       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5695     }
5696     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5697     ierr = PetscFree(svalues);CHKERRQ(ierr);
5698 
5699     /* allocate buffers for sending j and a arrays */
5700     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5701     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5702 
5703     /* create i-array of B_oth */
5704     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5705 
5706     b_othi[0] = 0;
5707     len       = 0; /* total length of j or a array to be received */
5708     k         = 0;
5709     for (i=0; i<nrecvs; i++) {
5710       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5711       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5712       for (j=0; j<nrows; j++) {
5713         b_othi[k+1] = b_othi[k] + rowlen[j];
5714         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5715         k++;
5716       }
5717       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5718     }
5719     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5720 
5721     /* allocate space for j and a arrrays of B_oth */
5722     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5723     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5724 
5725     /* j-array */
5726     /*---------*/
5727     /*  post receives of j-array */
5728     for (i=0; i<nrecvs; i++) {
5729       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5730       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5731     }
5732 
5733     /* pack the outgoing message j-array */
5734     if (nsends) k = sstarts[0];
5735     for (i=0; i<nsends; i++) {
5736       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5737       bufJ  = bufj+sstartsj[i];
5738       for (j=0; j<nrows; j++) {
5739         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5740         for (ll=0; ll<sbs; ll++) {
5741           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5742           for (l=0; l<ncols; l++) {
5743             *bufJ++ = cols[l];
5744           }
5745           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5746         }
5747       }
5748       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5749     }
5750 
5751     /* recvs and sends of j-array are completed */
5752     i = nrecvs;
5753     while (i--) {
5754       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5755     }
5756     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5757   } else if (scall == MAT_REUSE_MATRIX) {
5758     sstartsj = *startsj_s;
5759     rstartsj = *startsj_r;
5760     bufa     = *bufa_ptr;
5761     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5762     b_otha   = b_oth->a;
5763   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5764 
5765   /* a-array */
5766   /*---------*/
5767   /*  post receives of a-array */
5768   for (i=0; i<nrecvs; i++) {
5769     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5770     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5771   }
5772 
5773   /* pack the outgoing message a-array */
5774   if (nsends) k = sstarts[0];
5775   for (i=0; i<nsends; i++) {
5776     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5777     bufA  = bufa+sstartsj[i];
5778     for (j=0; j<nrows; j++) {
5779       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5780       for (ll=0; ll<sbs; ll++) {
5781         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5782         for (l=0; l<ncols; l++) {
5783           *bufA++ = vals[l];
5784         }
5785         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5786       }
5787     }
5788     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5789   }
5790   /* recvs and sends of a-array are completed */
5791   i = nrecvs;
5792   while (i--) {
5793     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5794   }
5795   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5796   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5797 
5798   if (scall == MAT_INITIAL_MATRIX) {
5799     /* put together the new matrix */
5800     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5801 
5802     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5803     /* Since these are PETSc arrays, change flags to free them as necessary. */
5804     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5805     b_oth->free_a  = PETSC_TRUE;
5806     b_oth->free_ij = PETSC_TRUE;
5807     b_oth->nonew   = 0;
5808 
5809     ierr = PetscFree(bufj);CHKERRQ(ierr);
5810     if (!startsj_s || !bufa_ptr) {
5811       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5812       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5813     } else {
5814       *startsj_s = sstartsj;
5815       *startsj_r = rstartsj;
5816       *bufa_ptr  = bufa;
5817     }
5818   }
5819 
5820   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5821   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5822   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5823   PetscFunctionReturn(0);
5824 }
5825 
5826 /*@C
5827   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5828 
5829   Not Collective
5830 
5831   Input Parameters:
5832 . A - The matrix in mpiaij format
5833 
5834   Output Parameter:
5835 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5836 . colmap - A map from global column index to local index into lvec
5837 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5838 
5839   Level: developer
5840 
5841 @*/
5842 #if defined(PETSC_USE_CTABLE)
5843 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5844 #else
5845 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5846 #endif
5847 {
5848   Mat_MPIAIJ *a;
5849 
5850   PetscFunctionBegin;
5851   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5852   PetscValidPointer(lvec, 2);
5853   PetscValidPointer(colmap, 3);
5854   PetscValidPointer(multScatter, 4);
5855   a = (Mat_MPIAIJ*) A->data;
5856   if (lvec) *lvec = a->lvec;
5857   if (colmap) *colmap = a->colmap;
5858   if (multScatter) *multScatter = a->Mvctx;
5859   PetscFunctionReturn(0);
5860 }
5861 
5862 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5863 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5864 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5865 #if defined(PETSC_HAVE_MKL_SPARSE)
5866 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5867 #endif
5868 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5869 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5870 #if defined(PETSC_HAVE_ELEMENTAL)
5871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5872 #endif
5873 #if defined(PETSC_HAVE_SCALAPACK)
5874 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5875 #endif
5876 #if defined(PETSC_HAVE_HYPRE)
5877 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5878 #endif
5879 #if defined(PETSC_HAVE_CUDA)
5880 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5881 #endif
5882 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5883 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5884 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5885 
5886 /*
5887     Computes (B'*A')' since computing B*A directly is untenable
5888 
5889                n                       p                          p
5890         [             ]       [             ]         [                 ]
5891       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5892         [             ]       [             ]         [                 ]
5893 
5894 */
5895 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5896 {
5897   PetscErrorCode ierr;
5898   Mat            At,Bt,Ct;
5899 
5900   PetscFunctionBegin;
5901   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5902   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5903   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5904   ierr = MatDestroy(&At);CHKERRQ(ierr);
5905   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5906   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5907   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5908   PetscFunctionReturn(0);
5909 }
5910 
5911 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5912 {
5913   PetscErrorCode ierr;
5914   PetscBool      cisdense;
5915 
5916   PetscFunctionBegin;
5917   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5918   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5919   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5920   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5921   if (!cisdense) {
5922     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5923   }
5924   ierr = MatSetUp(C);CHKERRQ(ierr);
5925 
5926   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5927   PetscFunctionReturn(0);
5928 }
5929 
5930 /* ----------------------------------------------------------------*/
5931 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5932 {
5933   Mat_Product *product = C->product;
5934   Mat         A = product->A,B=product->B;
5935 
5936   PetscFunctionBegin;
5937   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5938     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5939 
5940   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5941   C->ops->productsymbolic = MatProductSymbolic_AB;
5942   PetscFunctionReturn(0);
5943 }
5944 
5945 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5946 {
5947   PetscErrorCode ierr;
5948   Mat_Product    *product = C->product;
5949 
5950   PetscFunctionBegin;
5951   if (product->type == MATPRODUCT_AB) {
5952     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5953   }
5954   PetscFunctionReturn(0);
5955 }
5956 /* ----------------------------------------------------------------*/
5957 
5958 /*MC
5959    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5960 
5961    Options Database Keys:
5962 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5963 
5964    Level: beginner
5965 
5966    Notes:
5967     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5968     in this case the values associated with the rows and columns one passes in are set to zero
5969     in the matrix
5970 
5971     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5972     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5973 
5974 .seealso: MatCreateAIJ()
5975 M*/
5976 
5977 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5978 {
5979   Mat_MPIAIJ     *b;
5980   PetscErrorCode ierr;
5981   PetscMPIInt    size;
5982 
5983   PetscFunctionBegin;
5984   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5985 
5986   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5987   B->data       = (void*)b;
5988   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5989   B->assembled  = PETSC_FALSE;
5990   B->insertmode = NOT_SET_VALUES;
5991   b->size       = size;
5992 
5993   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5994 
5995   /* build cache for off array entries formed */
5996   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5997 
5998   b->donotstash  = PETSC_FALSE;
5999   b->colmap      = NULL;
6000   b->garray      = NULL;
6001   b->roworiented = PETSC_TRUE;
6002 
6003   /* stuff used for matrix vector multiply */
6004   b->lvec  = NULL;
6005   b->Mvctx = NULL;
6006 
6007   /* stuff for MatGetRow() */
6008   b->rowindices   = NULL;
6009   b->rowvalues    = NULL;
6010   b->getrowactive = PETSC_FALSE;
6011 
6012   /* flexible pointer used in CUSP/CUSPARSE classes */
6013   b->spptr = NULL;
6014 
6015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6018   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6025 #if defined(PETSC_HAVE_MKL_SPARSE)
6026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6027 #endif
6028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6031 #if defined(PETSC_HAVE_ELEMENTAL)
6032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6033 #endif
6034 #if defined(PETSC_HAVE_SCALAPACK)
6035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6036 #endif
6037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6039 #if defined(PETSC_HAVE_HYPRE)
6040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6042 #endif
6043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6045   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6046   PetscFunctionReturn(0);
6047 }
6048 
6049 /*@C
6050      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6051          and "off-diagonal" part of the matrix in CSR format.
6052 
6053    Collective
6054 
6055    Input Parameters:
6056 +  comm - MPI communicator
6057 .  m - number of local rows (Cannot be PETSC_DECIDE)
6058 .  n - This value should be the same as the local size used in creating the
6059        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6060        calculated if N is given) For square matrices n is almost always m.
6061 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6062 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6063 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6064 .   j - column indices
6065 .   a - matrix values
6066 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6067 .   oj - column indices
6068 -   oa - matrix values
6069 
6070    Output Parameter:
6071 .   mat - the matrix
6072 
6073    Level: advanced
6074 
6075    Notes:
6076        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6077        must free the arrays once the matrix has been destroyed and not before.
6078 
6079        The i and j indices are 0 based
6080 
6081        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6082 
6083        This sets local rows and cannot be used to set off-processor values.
6084 
6085        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6086        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6087        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6088        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6089        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6090        communication if it is known that only local entries will be set.
6091 
6092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6093           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6094 @*/
6095 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6096 {
6097   PetscErrorCode ierr;
6098   Mat_MPIAIJ     *maij;
6099 
6100   PetscFunctionBegin;
6101   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6102   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6103   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6104   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6105   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6106   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6107   maij = (Mat_MPIAIJ*) (*mat)->data;
6108 
6109   (*mat)->preallocated = PETSC_TRUE;
6110 
6111   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6112   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6113 
6114   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6115   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6116 
6117   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6118   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6119   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6120   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6121 
6122   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6123   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6124   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6125   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6126   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6127   PetscFunctionReturn(0);
6128 }
6129 
6130 /*
6131     Special version for direct calls from Fortran
6132 */
6133 #include <petsc/private/fortranimpl.h>
6134 
6135 /* Change these macros so can be used in void function */
6136 #undef CHKERRQ
6137 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6138 #undef SETERRQ2
6139 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6140 #undef SETERRQ3
6141 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6142 #undef SETERRQ
6143 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6144 
6145 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6146 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6147 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6148 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6149 #else
6150 #endif
6151 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6152 {
6153   Mat            mat  = *mmat;
6154   PetscInt       m    = *mm, n = *mn;
6155   InsertMode     addv = *maddv;
6156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6157   PetscScalar    value;
6158   PetscErrorCode ierr;
6159 
6160   MatCheckPreallocated(mat,1);
6161   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6162   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6163   {
6164     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6165     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6166     PetscBool roworiented = aij->roworiented;
6167 
6168     /* Some Variables required in the macro */
6169     Mat        A                    = aij->A;
6170     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6171     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6172     MatScalar  *aa                  = a->a;
6173     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6174     Mat        B                    = aij->B;
6175     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6176     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6177     MatScalar  *ba                  = b->a;
6178     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6179      * cannot use "#if defined" inside a macro. */
6180     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6181 
6182     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6183     PetscInt  nonew = a->nonew;
6184     MatScalar *ap1,*ap2;
6185 
6186     PetscFunctionBegin;
6187     for (i=0; i<m; i++) {
6188       if (im[i] < 0) continue;
6189       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6190       if (im[i] >= rstart && im[i] < rend) {
6191         row      = im[i] - rstart;
6192         lastcol1 = -1;
6193         rp1      = aj + ai[row];
6194         ap1      = aa + ai[row];
6195         rmax1    = aimax[row];
6196         nrow1    = ailen[row];
6197         low1     = 0;
6198         high1    = nrow1;
6199         lastcol2 = -1;
6200         rp2      = bj + bi[row];
6201         ap2      = ba + bi[row];
6202         rmax2    = bimax[row];
6203         nrow2    = bilen[row];
6204         low2     = 0;
6205         high2    = nrow2;
6206 
6207         for (j=0; j<n; j++) {
6208           if (roworiented) value = v[i*n+j];
6209           else value = v[i+j*m];
6210           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6211           if (in[j] >= cstart && in[j] < cend) {
6212             col = in[j] - cstart;
6213             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6214 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6215             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6216 #endif
6217           } else if (in[j] < 0) continue;
6218           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6219             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6220             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6221           } else {
6222             if (mat->was_assembled) {
6223               if (!aij->colmap) {
6224                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6225               }
6226 #if defined(PETSC_USE_CTABLE)
6227               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6228               col--;
6229 #else
6230               col = aij->colmap[in[j]] - 1;
6231 #endif
6232               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6233                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6234                 col  =  in[j];
6235                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6236                 B        = aij->B;
6237                 b        = (Mat_SeqAIJ*)B->data;
6238                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6239                 rp2      = bj + bi[row];
6240                 ap2      = ba + bi[row];
6241                 rmax2    = bimax[row];
6242                 nrow2    = bilen[row];
6243                 low2     = 0;
6244                 high2    = nrow2;
6245                 bm       = aij->B->rmap->n;
6246                 ba       = b->a;
6247                 inserted = PETSC_FALSE;
6248               }
6249             } else col = in[j];
6250             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6252             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6253 #endif
6254           }
6255         }
6256       } else if (!aij->donotstash) {
6257         if (roworiented) {
6258           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6259         } else {
6260           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6261         }
6262       }
6263     }
6264   }
6265   PetscFunctionReturnVoid();
6266 }
6267