xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f4f4c0e7a45c239f71a768ba5cb032f16c0e33b6)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
584 #endif
585     if (im[i] >= rstart && im[i] < rend) {
586       row      = im[i] - rstart;
587       lastcol1 = -1;
588       rp1      = aj + ai[row];
589       ap1      = aa + ai[row];
590       rmax1    = aimax[row];
591       nrow1    = ailen[row];
592       low1     = 0;
593       high1    = nrow1;
594       lastcol2 = -1;
595       rp2      = bj + bi[row];
596       ap2      = ba + bi[row];
597       rmax2    = bimax[row];
598       nrow2    = bilen[row];
599       low2     = 0;
600       high2    = nrow2;
601 
602       for (j=0; j<n; j++) {
603         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
604         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
605         if (in[j] >= cstart && in[j] < cend) {
606           col   = in[j] - cstart;
607           nonew = a->nonew;
608           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
610           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
611 #endif
612         } else if (in[j] < 0) continue;
613 #if defined(PETSC_USE_DEBUG)
614         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
615 #endif
616         else {
617           if (mat->was_assembled) {
618             if (!aij->colmap) {
619               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
620             }
621 #if defined(PETSC_USE_CTABLE)
622             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
623             col--;
624 #else
625             col = aij->colmap[in[j]] - 1;
626 #endif
627             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
628               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
629               col  =  in[j];
630               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
631               B        = aij->B;
632               b        = (Mat_SeqAIJ*)B->data;
633               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
634               rp2      = bj + bi[row];
635               ap2      = ba + bi[row];
636               rmax2    = bimax[row];
637               nrow2    = bilen[row];
638               low2     = 0;
639               high2    = nrow2;
640               bm       = aij->B->rmap->n;
641               ba       = b->a;
642               inserted = PETSC_FALSE;
643             } else if (col < 0) {
644               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
645                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
646               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
647             }
648           } else col = in[j];
649           nonew = b->nonew;
650           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
652           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
653 #endif
654         }
655       }
656     } else {
657       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
658       if (!aij->donotstash) {
659         mat->assembled = PETSC_FALSE;
660         if (roworiented) {
661           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
662         } else {
663           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
664         }
665       }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
673     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
674     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
675 */
676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
677 {
678   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
679   Mat            A           = aij->A; /* diagonal part of the matrix */
680   Mat            B           = aij->B; /* offdiagonal part of the matrix */
681   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
682   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
683   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
684   PetscInt       *ailen      = a->ilen,*aj = a->j;
685   PetscInt       *bilen      = b->ilen,*bj = b->j;
686   PetscInt       am          = aij->A->rmap->n,j;
687   PetscInt       diag_so_far = 0,dnz;
688   PetscInt       offd_so_far = 0,onz;
689 
690   PetscFunctionBegin;
691   /* Iterate over all rows of the matrix */
692   for (j=0; j<am; j++) {
693     dnz = onz = 0;
694     /*  Iterate over all non-zero columns of the current row */
695     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
696       /* If column is in the diagonal */
697       if (mat_j[col] >= cstart && mat_j[col] < cend) {
698         aj[diag_so_far++] = mat_j[col] - cstart;
699         dnz++;
700       } else { /* off-diagonal entries */
701         bj[offd_so_far++] = mat_j[col];
702         onz++;
703       }
704     }
705     ailen[j] = dnz;
706     bilen[j] = onz;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 /*
712     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
713     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
714     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
715     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
716     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
717 */
718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
719 {
720   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
721   Mat            A      = aij->A; /* diagonal part of the matrix */
722   Mat            B      = aij->B; /* offdiagonal part of the matrix */
723   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
724   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
725   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
727   PetscInt       *ailen = a->ilen,*aj = a->j;
728   PetscInt       *bilen = b->ilen,*bj = b->j;
729   PetscInt       am     = aij->A->rmap->n,j;
730   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
731   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
732   PetscScalar    *aa = a->a,*ba = b->a;
733 
734   PetscFunctionBegin;
735   /* Iterate over all rows of the matrix */
736   for (j=0; j<am; j++) {
737     dnz_row = onz_row = 0;
738     rowstart_offd = full_offd_i[j];
739     rowstart_diag = full_diag_i[j];
740     /*  Iterate over all non-zero columns of the current row */
741     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
742       /* If column is in the diagonal */
743       if (mat_j[col] >= cstart && mat_j[col] < cend) {
744         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
745         aa[rowstart_diag+dnz_row] = mat_a[col];
746         dnz_row++;
747       } else { /* off-diagonal entries */
748         bj[rowstart_offd+onz_row] = mat_j[col];
749         ba[rowstart_offd+onz_row] = mat_a[col];
750         onz_row++;
751       }
752     }
753     ailen[j] = dnz_row;
754     bilen[j] = onz_row;
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
764   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
765 
766   PetscFunctionBegin;
767   for (i=0; i<m; i++) {
768     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
769     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
770     if (idxm[i] >= rstart && idxm[i] < rend) {
771       row = idxm[i] - rstart;
772       for (j=0; j<n; j++) {
773         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
774         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
775         if (idxn[j] >= cstart && idxn[j] < cend) {
776           col  = idxn[j] - cstart;
777           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
778         } else {
779           if (!aij->colmap) {
780             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
781           }
782 #if defined(PETSC_USE_CTABLE)
783           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
784           col--;
785 #else
786           col = aij->colmap[idxn[j]] - 1;
787 #endif
788           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
789           else {
790             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
800 
801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   PetscErrorCode ierr;
805   PetscInt       nstash,reallocs;
806 
807   PetscFunctionBegin;
808   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
812   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
819   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
820   PetscErrorCode ierr;
821   PetscMPIInt    n;
822   PetscInt       i,j,rstart,ncols,flg;
823   PetscInt       *row,*col;
824   PetscBool      other_disassembled;
825   PetscScalar    *val;
826 
827   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
828 
829   PetscFunctionBegin;
830   if (!aij->donotstash && !mat->nooffprocentries) {
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for (i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for (j=i,rstart=row[j]; j<n; j++) {
838           if (row[j] != rstart) break;
839         }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         /* Now assemble all these values with a single function call */
843         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
844 
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849   }
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,p = 0,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscSFNode       *rrows;
993   PetscSF           sf;
994   const PetscScalar *xx;
995   PetscScalar       *bb,*mask;
996   Vec               xmask,lmask;
997   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
998   const PetscInt    *aj, *ii,*ridx;
999   PetscScalar       *aa;
1000 
1001   PetscFunctionBegin;
1002   /* Create SF where leaves are input rows and roots are owned rows */
1003   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1004   for (r = 0; r < n; ++r) lrows[r] = -1;
1005   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1006   for (r = 0; r < N; ++r) {
1007     const PetscInt idx   = rows[r];
1008     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1009     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1010       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1011     }
1012     rrows[r].rank  = p;
1013     rrows[r].index = rows[r] - owners[p];
1014   }
1015   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1016   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1017   /* Collect flags for rows to be zeroed */
1018   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1019   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1021   /* Compress and put in row numbers */
1022   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1023   /* zero diagonal part of matrix */
1024   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1025   /* handle off diagonal part of matrix */
1026   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1027   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1028   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1029   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1030   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1031   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1034   if (x && b) { /* this code is buggy when the row and column layout don't match */
1035     PetscBool cong;
1036 
1037     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1038     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1039     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1040     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1042     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1043   }
1044   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1045   /* remove zeroed rows of off diagonal matrix */
1046   ii = aij->i;
1047   for (i=0; i<len; i++) {
1048     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1049   }
1050   /* loop over all elements of off process part of matrix zeroing removed columns*/
1051   if (aij->compressedrow.use) {
1052     m    = aij->compressedrow.nrows;
1053     ii   = aij->compressedrow.i;
1054     ridx = aij->compressedrow.rindex;
1055     for (i=0; i<m; i++) {
1056       n  = ii[i+1] - ii[i];
1057       aj = aij->j + ii[i];
1058       aa = aij->a + ii[i];
1059 
1060       for (j=0; j<n; j++) {
1061         if (PetscAbsScalar(mask[*aj])) {
1062           if (b) bb[*ridx] -= *aa*xx[*aj];
1063           *aa = 0.0;
1064         }
1065         aa++;
1066         aj++;
1067       }
1068       ridx++;
1069     }
1070   } else { /* do not use compressed row format */
1071     m = l->B->rmap->n;
1072     for (i=0; i<m; i++) {
1073       n  = ii[i+1] - ii[i];
1074       aj = aij->j + ii[i];
1075       aa = aij->a + ii[i];
1076       for (j=0; j<n; j++) {
1077         if (PetscAbsScalar(mask[*aj])) {
1078           if (b) bb[i] -= *aa*xx[*aj];
1079           *aa = 0.0;
1080         }
1081         aa++;
1082         aj++;
1083       }
1084     }
1085   }
1086   if (x && b) {
1087     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1088     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1089   }
1090   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1091   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1092   ierr = PetscFree(lrows);CHKERRQ(ierr);
1093 
1094   /* only change matrix nonzero state if pattern was allowed to be changed */
1095   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1096     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1097     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1098   }
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107   VecScatter     Mvctx = a->Mvctx;
1108 
1109   PetscFunctionBegin;
1110   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1111   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1112 
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1290 #if defined(PETSC_HAVE_ELEMENTAL)
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1292 #endif
1293 #if defined(PETSC_HAVE_HYPRE)
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1296 #endif
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1303 {
1304   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1305   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1306   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1307   PetscErrorCode ierr;
1308   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1309   int            fd;
1310   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1311   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1312   PetscScalar    *column_values;
1313   PetscInt       message_count,flowcontrolcount;
1314   FILE           *file;
1315 
1316   PetscFunctionBegin;
1317   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1318   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1319   nz   = A->nz + B->nz;
1320   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1321   if (!rank) {
1322     header[0] = MAT_FILE_CLASSID;
1323     header[1] = mat->rmap->N;
1324     header[2] = mat->cmap->N;
1325 
1326     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1328     /* get largest number of rows any processor has */
1329     rlen  = mat->rmap->n;
1330     range = mat->rmap->range;
1331     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1332   } else {
1333     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334     rlen = mat->rmap->n;
1335   }
1336 
1337   /* load up the local row counts */
1338   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1339   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1340 
1341   /* store the row lengths to the file */
1342   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1343   if (!rank) {
1344     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1345     for (i=1; i<size; i++) {
1346       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1347       rlen = range[i+1] - range[i];
1348       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1349       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1350     }
1351     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1352   } else {
1353     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1354     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1355     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1356   }
1357   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1358 
1359   /* load up the local column indices */
1360   nzmax = nz; /* th processor needs space a largest processor needs */
1361   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1363   cnt   = 0;
1364   for (i=0; i<mat->rmap->n; i++) {
1365     for (j=B->i[i]; j<B->i[i+1]; j++) {
1366       if ((col = garray[B->j[j]]) > cstart) break;
1367       column_indices[cnt++] = col;
1368     }
1369     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1370     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1371   }
1372   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1373 
1374   /* store the column indices to the file */
1375   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1376   if (!rank) {
1377     MPI_Status status;
1378     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1379     for (i=1; i<size; i++) {
1380       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1381       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1382       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1383       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1384       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1385     }
1386     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1387   } else {
1388     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1389     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1392   }
1393   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1394 
1395   /* load up the local column values */
1396   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1397   cnt  = 0;
1398   for (i=0; i<mat->rmap->n; i++) {
1399     for (j=B->i[i]; j<B->i[i+1]; j++) {
1400       if (garray[B->j[j]] > cstart) break;
1401       column_values[cnt++] = B->a[j];
1402     }
1403     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1404     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1405   }
1406   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1407 
1408   /* store the column values to the file */
1409   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1410   if (!rank) {
1411     MPI_Status status;
1412     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1413     for (i=1; i<size; i++) {
1414       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1415       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1416       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1417       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1418       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1419     }
1420     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1421   } else {
1422     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1423     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1424     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1426   }
1427   ierr = PetscFree(column_values);CHKERRQ(ierr);
1428 
1429   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1430   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #include <petscdraw.h>
1435 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1436 {
1437   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1438   PetscErrorCode    ierr;
1439   PetscMPIInt       rank = aij->rank,size = aij->size;
1440   PetscBool         isdraw,iascii,isbinary;
1441   PetscViewer       sviewer;
1442   PetscViewerFormat format;
1443 
1444   PetscFunctionBegin;
1445   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1446   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1447   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1448   if (iascii) {
1449     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1450     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1451       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1452       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1453       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1454       for (i=0; i<(PetscInt)size; i++) {
1455         nmax = PetscMax(nmax,nz[i]);
1456         nmin = PetscMin(nmin,nz[i]);
1457         navg += nz[i];
1458       }
1459       ierr = PetscFree(nz);CHKERRQ(ierr);
1460       navg = navg/size;
1461       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1462       PetscFunctionReturn(0);
1463     }
1464     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1465     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1466       MatInfo   info;
1467       PetscBool inodes;
1468 
1469       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1470       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1471       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1473       if (!inodes) {
1474         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1475                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1476       } else {
1477         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1478                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1479       }
1480       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1482       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1483       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1484       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1485       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1486       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1487       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1488       PetscFunctionReturn(0);
1489     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1490       PetscInt inodecount,inodelimit,*inodes;
1491       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1492       if (inodes) {
1493         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1494       } else {
1495         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1496       }
1497       PetscFunctionReturn(0);
1498     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1499       PetscFunctionReturn(0);
1500     }
1501   } else if (isbinary) {
1502     if (size == 1) {
1503       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1504       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1505     } else {
1506       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1507     }
1508     PetscFunctionReturn(0);
1509   } else if (iascii && size == 1) {
1510     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1511     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1512     PetscFunctionReturn(0);
1513   } else if (isdraw) {
1514     PetscDraw draw;
1515     PetscBool isnull;
1516     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1517     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1518     if (isnull) PetscFunctionReturn(0);
1519   }
1520 
1521   { /* assemble the entire matrix onto first processor */
1522     Mat A = NULL, Av;
1523     IS  isrow,iscol;
1524 
1525     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1526     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1527     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1528     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1529 /*  The commented code uses MatCreateSubMatrices instead */
1530 /*
1531     Mat *AA, A = NULL, Av;
1532     IS  isrow,iscol;
1533 
1534     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1535     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1536     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1537     if (!rank) {
1538        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1539        A    = AA[0];
1540        Av   = AA[0];
1541     }
1542     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1543 */
1544     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1545     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1546     /*
1547        Everyone has to call to draw the matrix since the graphics waits are
1548        synchronized across all processors that share the PetscDraw object
1549     */
1550     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1551     if (!rank) {
1552       if (((PetscObject)mat)->name) {
1553         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1554       }
1555       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1556     }
1557     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1558     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1559     ierr = MatDestroy(&A);CHKERRQ(ierr);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1565 {
1566   PetscErrorCode ierr;
1567   PetscBool      iascii,isdraw,issocket,isbinary;
1568 
1569   PetscFunctionBegin;
1570   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1571   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1572   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1573   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1574   if (iascii || isdraw || isbinary || issocket) {
1575     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1581 {
1582   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1583   PetscErrorCode ierr;
1584   Vec            bb1 = 0;
1585   PetscBool      hasop;
1586 
1587   PetscFunctionBegin;
1588   if (flag == SOR_APPLY_UPPER) {
1589     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1590     PetscFunctionReturn(0);
1591   }
1592 
1593   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1594     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1595   }
1596 
1597   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1598     if (flag & SOR_ZERO_INITIAL_GUESS) {
1599       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1600       its--;
1601     }
1602 
1603     while (its--) {
1604       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606 
1607       /* update rhs: bb1 = bb - B*x */
1608       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1609       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1610 
1611       /* local sweep */
1612       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1613     }
1614   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1615     if (flag & SOR_ZERO_INITIAL_GUESS) {
1616       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1617       its--;
1618     }
1619     while (its--) {
1620       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1621       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1622 
1623       /* update rhs: bb1 = bb - B*x */
1624       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1625       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1626 
1627       /* local sweep */
1628       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1629     }
1630   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1631     if (flag & SOR_ZERO_INITIAL_GUESS) {
1632       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1633       its--;
1634     }
1635     while (its--) {
1636       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1637       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1638 
1639       /* update rhs: bb1 = bb - B*x */
1640       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1641       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1642 
1643       /* local sweep */
1644       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1645     }
1646   } else if (flag & SOR_EISENSTAT) {
1647     Vec xx1;
1648 
1649     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1650     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1651 
1652     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1654     if (!mat->diag) {
1655       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1656       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1657     }
1658     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1659     if (hasop) {
1660       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1661     } else {
1662       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1663     }
1664     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1665 
1666     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1667 
1668     /* local sweep */
1669     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1670     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1671     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1672   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1673 
1674   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1675 
1676   matin->factorerrortype = mat->A->factorerrortype;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1681 {
1682   Mat            aA,aB,Aperm;
1683   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1684   PetscScalar    *aa,*ba;
1685   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1686   PetscSF        rowsf,sf;
1687   IS             parcolp = NULL;
1688   PetscBool      done;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1693   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1694   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1695   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1696 
1697   /* Invert row permutation to find out where my rows should go */
1698   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1699   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1700   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1701   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1702   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1703   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1704 
1705   /* Invert column permutation to find out where my columns should go */
1706   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1707   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1708   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1709   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1710   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1711   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1712   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1713 
1714   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1715   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1716   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1717 
1718   /* Find out where my gcols should go */
1719   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1720   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1721   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1722   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1723   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1724   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1725   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1726   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1727 
1728   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1729   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1730   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1731   for (i=0; i<m; i++) {
1732     PetscInt row = rdest[i],rowner;
1733     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1734     for (j=ai[i]; j<ai[i+1]; j++) {
1735       PetscInt cowner,col = cdest[aj[j]];
1736       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1737       if (rowner == cowner) dnnz[i]++;
1738       else onnz[i]++;
1739     }
1740     for (j=bi[i]; j<bi[i+1]; j++) {
1741       PetscInt cowner,col = gcdest[bj[j]];
1742       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1743       if (rowner == cowner) dnnz[i]++;
1744       else onnz[i]++;
1745     }
1746   }
1747   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1748   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1749   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1750   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1751   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1752 
1753   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1754   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1755   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1756   for (i=0; i<m; i++) {
1757     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1758     PetscInt j0,rowlen;
1759     rowlen = ai[i+1] - ai[i];
1760     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1761       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1762       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1763     }
1764     rowlen = bi[i+1] - bi[i];
1765     for (j0=j=0; j<rowlen; j0=j) {
1766       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1767       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1768     }
1769   }
1770   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1771   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1772   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1773   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1774   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1775   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1776   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1777   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1778   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1779   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1780   *B = Aperm;
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1791   if (ghosts) *ghosts = aij->garray;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1796 {
1797   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1798   Mat            A    = mat->A,B = mat->B;
1799   PetscErrorCode ierr;
1800   PetscLogDouble isend[5],irecv[5];
1801 
1802   PetscFunctionBegin;
1803   info->block_size = 1.0;
1804   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1805 
1806   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1807   isend[3] = info->memory;  isend[4] = info->mallocs;
1808 
1809   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1810 
1811   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1812   isend[3] += info->memory;  isend[4] += info->mallocs;
1813   if (flag == MAT_LOCAL) {
1814     info->nz_used      = isend[0];
1815     info->nz_allocated = isend[1];
1816     info->nz_unneeded  = isend[2];
1817     info->memory       = isend[3];
1818     info->mallocs      = isend[4];
1819   } else if (flag == MAT_GLOBAL_MAX) {
1820     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1821 
1822     info->nz_used      = irecv[0];
1823     info->nz_allocated = irecv[1];
1824     info->nz_unneeded  = irecv[2];
1825     info->memory       = irecv[3];
1826     info->mallocs      = irecv[4];
1827   } else if (flag == MAT_GLOBAL_SUM) {
1828     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1829 
1830     info->nz_used      = irecv[0];
1831     info->nz_allocated = irecv[1];
1832     info->nz_unneeded  = irecv[2];
1833     info->memory       = irecv[3];
1834     info->mallocs      = irecv[4];
1835   }
1836   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1837   info->fill_ratio_needed = 0;
1838   info->factor_mallocs    = 0;
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1843 {
1844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1845   PetscErrorCode ierr;
1846 
1847   PetscFunctionBegin;
1848   switch (op) {
1849   case MAT_NEW_NONZERO_LOCATIONS:
1850   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1851   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1852   case MAT_KEEP_NONZERO_PATTERN:
1853   case MAT_NEW_NONZERO_LOCATION_ERR:
1854   case MAT_USE_INODES:
1855   case MAT_IGNORE_ZERO_ENTRIES:
1856     MatCheckPreallocated(A,1);
1857     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1858     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1859     break;
1860   case MAT_ROW_ORIENTED:
1861     MatCheckPreallocated(A,1);
1862     a->roworiented = flg;
1863 
1864     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1865     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1866     break;
1867   case MAT_NEW_DIAGONALS:
1868   case MAT_SORTED_FULL:
1869     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1870     break;
1871   case MAT_IGNORE_OFF_PROC_ENTRIES:
1872     a->donotstash = flg;
1873     break;
1874   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1875   case MAT_SPD:
1876   case MAT_SYMMETRIC:
1877   case MAT_STRUCTURALLY_SYMMETRIC:
1878   case MAT_HERMITIAN:
1879   case MAT_SYMMETRY_ETERNAL:
1880     break;
1881   case MAT_SUBMAT_SINGLEIS:
1882     A->submat_singleis = flg;
1883     break;
1884   case MAT_STRUCTURE_ONLY:
1885     /* The option is handled directly by MatSetOption() */
1886     break;
1887   default:
1888     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1894 {
1895   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1896   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1897   PetscErrorCode ierr;
1898   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1899   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1900   PetscInt       *cmap,*idx_p;
1901 
1902   PetscFunctionBegin;
1903   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1904   mat->getrowactive = PETSC_TRUE;
1905 
1906   if (!mat->rowvalues && (idx || v)) {
1907     /*
1908         allocate enough space to hold information from the longest row.
1909     */
1910     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1911     PetscInt   max = 1,tmp;
1912     for (i=0; i<matin->rmap->n; i++) {
1913       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1914       if (max < tmp) max = tmp;
1915     }
1916     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1917   }
1918 
1919   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1920   lrow = row - rstart;
1921 
1922   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1923   if (!v)   {pvA = 0; pvB = 0;}
1924   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1925   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   nztot = nzA + nzB;
1928 
1929   cmap = mat->garray;
1930   if (v  || idx) {
1931     if (nztot) {
1932       /* Sort by increasing column numbers, assuming A and B already sorted */
1933       PetscInt imark = -1;
1934       if (v) {
1935         *v = v_p = mat->rowvalues;
1936         for (i=0; i<nzB; i++) {
1937           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1938           else break;
1939         }
1940         imark = i;
1941         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1942         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1943       }
1944       if (idx) {
1945         *idx = idx_p = mat->rowindices;
1946         if (imark > -1) {
1947           for (i=0; i<imark; i++) {
1948             idx_p[i] = cmap[cworkB[i]];
1949           }
1950         } else {
1951           for (i=0; i<nzB; i++) {
1952             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1953             else break;
1954           }
1955           imark = i;
1956         }
1957         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1958         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1959       }
1960     } else {
1961       if (idx) *idx = 0;
1962       if (v)   *v   = 0;
1963     }
1964   }
1965   *nz  = nztot;
1966   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1967   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1972 {
1973   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1974 
1975   PetscFunctionBegin;
1976   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1977   aij->getrowactive = PETSC_FALSE;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1982 {
1983   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1984   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1985   PetscErrorCode ierr;
1986   PetscInt       i,j,cstart = mat->cmap->rstart;
1987   PetscReal      sum = 0.0;
1988   MatScalar      *v;
1989 
1990   PetscFunctionBegin;
1991   if (aij->size == 1) {
1992     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1993   } else {
1994     if (type == NORM_FROBENIUS) {
1995       v = amat->a;
1996       for (i=0; i<amat->nz; i++) {
1997         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1998       }
1999       v = bmat->a;
2000       for (i=0; i<bmat->nz; i++) {
2001         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2002       }
2003       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2004       *norm = PetscSqrtReal(*norm);
2005       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
2006     } else if (type == NORM_1) { /* max column norm */
2007       PetscReal *tmp,*tmp2;
2008       PetscInt  *jj,*garray = aij->garray;
2009       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
2010       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
2011       *norm = 0.0;
2012       v     = amat->a; jj = amat->j;
2013       for (j=0; j<amat->nz; j++) {
2014         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2015       }
2016       v = bmat->a; jj = bmat->j;
2017       for (j=0; j<bmat->nz; j++) {
2018         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2019       }
2020       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2021       for (j=0; j<mat->cmap->N; j++) {
2022         if (tmp2[j] > *norm) *norm = tmp2[j];
2023       }
2024       ierr = PetscFree(tmp);CHKERRQ(ierr);
2025       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2026       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2027     } else if (type == NORM_INFINITY) { /* max row norm */
2028       PetscReal ntemp = 0.0;
2029       for (j=0; j<aij->A->rmap->n; j++) {
2030         v   = amat->a + amat->i[j];
2031         sum = 0.0;
2032         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2033           sum += PetscAbsScalar(*v); v++;
2034         }
2035         v = bmat->a + bmat->i[j];
2036         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2037           sum += PetscAbsScalar(*v); v++;
2038         }
2039         if (sum > ntemp) ntemp = sum;
2040       }
2041       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2042       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2043     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2044   }
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2049 {
2050   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2051   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2052   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2053   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2054   PetscErrorCode  ierr;
2055   Mat             B,A_diag,*B_diag;
2056   const MatScalar *array;
2057 
2058   PetscFunctionBegin;
2059   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2060   ai = Aloc->i; aj = Aloc->j;
2061   bi = Bloc->i; bj = Bloc->j;
2062   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2063     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2064     PetscSFNode          *oloc;
2065     PETSC_UNUSED PetscSF sf;
2066 
2067     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2068     /* compute d_nnz for preallocation */
2069     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2070     for (i=0; i<ai[ma]; i++) {
2071       d_nnz[aj[i]]++;
2072     }
2073     /* compute local off-diagonal contributions */
2074     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2075     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2076     /* map those to global */
2077     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2078     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2079     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2080     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2081     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2082     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2083     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2084 
2085     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2086     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2087     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2088     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2089     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2090     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2091   } else {
2092     B    = *matout;
2093     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2094   }
2095 
2096   b           = (Mat_MPIAIJ*)B->data;
2097   A_diag      = a->A;
2098   B_diag      = &b->A;
2099   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2100   A_diag_ncol = A_diag->cmap->N;
2101   B_diag_ilen = sub_B_diag->ilen;
2102   B_diag_i    = sub_B_diag->i;
2103 
2104   /* Set ilen for diagonal of B */
2105   for (i=0; i<A_diag_ncol; i++) {
2106     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2107   }
2108 
2109   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2110   very quickly (=without using MatSetValues), because all writes are local. */
2111   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2112 
2113   /* copy over the B part */
2114   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2115   array = Bloc->a;
2116   row   = A->rmap->rstart;
2117   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2118   cols_tmp = cols;
2119   for (i=0; i<mb; i++) {
2120     ncol = bi[i+1]-bi[i];
2121     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2122     row++;
2123     array += ncol; cols_tmp += ncol;
2124   }
2125   ierr = PetscFree(cols);CHKERRQ(ierr);
2126 
2127   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2128   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2129   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2130     *matout = B;
2131   } else {
2132     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2133   }
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2138 {
2139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2140   Mat            a    = aij->A,b = aij->B;
2141   PetscErrorCode ierr;
2142   PetscInt       s1,s2,s3;
2143 
2144   PetscFunctionBegin;
2145   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2146   if (rr) {
2147     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2148     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2149     /* Overlap communication with computation. */
2150     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2151   }
2152   if (ll) {
2153     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2154     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2155     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2156   }
2157   /* scale  the diagonal block */
2158   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2159 
2160   if (rr) {
2161     /* Do a scatter end and then right scale the off-diagonal block */
2162     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2163     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2169 {
2170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2179 {
2180   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2181   Mat            a,b,c,d;
2182   PetscBool      flg;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   a = matA->A; b = matA->B;
2187   c = matB->A; d = matB->B;
2188 
2189   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2190   if (flg) {
2191     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2192   }
2193   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2201   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2202 
2203   PetscFunctionBegin;
2204   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2205   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2206     /* because of the column compression in the off-processor part of the matrix a->B,
2207        the number of columns in a->B and b->B may be different, hence we cannot call
2208        the MatCopy() directly on the two parts. If need be, we can provide a more
2209        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2210        then copying the submatrices */
2211     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2212   } else {
2213     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2214     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2215   }
2216   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2221 {
2222   PetscErrorCode ierr;
2223 
2224   PetscFunctionBegin;
2225   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 /*
2230    Computes the number of nonzeros per row needed for preallocation when X and Y
2231    have different nonzero structure.
2232 */
2233 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2234 {
2235   PetscInt       i,j,k,nzx,nzy;
2236 
2237   PetscFunctionBegin;
2238   /* Set the number of nonzeros in the new matrix */
2239   for (i=0; i<m; i++) {
2240     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2241     nzx = xi[i+1] - xi[i];
2242     nzy = yi[i+1] - yi[i];
2243     nnz[i] = 0;
2244     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2245       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2246       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2247       nnz[i]++;
2248     }
2249     for (; k<nzy; k++) nnz[i]++;
2250   }
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2255 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2256 {
2257   PetscErrorCode ierr;
2258   PetscInt       m = Y->rmap->N;
2259   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2260   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2268 {
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2271   PetscBLASInt   bnz,one=1;
2272   Mat_SeqAIJ     *x,*y;
2273 
2274   PetscFunctionBegin;
2275   if (str == SAME_NONZERO_PATTERN) {
2276     PetscScalar alpha = a;
2277     x    = (Mat_SeqAIJ*)xx->A->data;
2278     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2279     y    = (Mat_SeqAIJ*)yy->A->data;
2280     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2281     x    = (Mat_SeqAIJ*)xx->B->data;
2282     y    = (Mat_SeqAIJ*)yy->B->data;
2283     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2284     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2285     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2286     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2287        will be updated */
2288 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2289     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2290       Y->offloadmask = PETSC_OFFLOAD_CPU;
2291     }
2292 #endif
2293   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2294     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2295   } else {
2296     Mat      B;
2297     PetscInt *nnz_d,*nnz_o;
2298     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2299     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2300     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2301     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2302     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2303     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2304     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2305     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2306     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2307     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2308     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2309     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2310     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2311     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2312   }
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2317 
2318 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2319 {
2320 #if defined(PETSC_USE_COMPLEX)
2321   PetscErrorCode ierr;
2322   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2323 
2324   PetscFunctionBegin;
2325   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2326   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2327 #else
2328   PetscFunctionBegin;
2329 #endif
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2334 {
2335   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2336   PetscErrorCode ierr;
2337 
2338   PetscFunctionBegin;
2339   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2340   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2345 {
2346   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2351   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2356 {
2357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2358   PetscErrorCode ierr;
2359   PetscInt       i,*idxb = 0;
2360   PetscScalar    *va,*vb;
2361   Vec            vtmp;
2362 
2363   PetscFunctionBegin;
2364   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2365   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2366   if (idx) {
2367     for (i=0; i<A->rmap->n; i++) {
2368       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2369     }
2370   }
2371 
2372   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2373   if (idx) {
2374     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2375   }
2376   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2377   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2378 
2379   for (i=0; i<A->rmap->n; i++) {
2380     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2381       va[i] = vb[i];
2382       if (idx) idx[i] = a->garray[idxb[i]];
2383     }
2384   }
2385 
2386   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2387   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2388   ierr = PetscFree(idxb);CHKERRQ(ierr);
2389   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2394 {
2395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2396   PetscErrorCode ierr;
2397   PetscInt       i,*idxb = 0;
2398   PetscScalar    *va,*vb;
2399   Vec            vtmp;
2400 
2401   PetscFunctionBegin;
2402   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2404   if (idx) {
2405     for (i=0; i<A->cmap->n; i++) {
2406       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2407     }
2408   }
2409 
2410   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2411   if (idx) {
2412     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2413   }
2414   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2415   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2416 
2417   for (i=0; i<A->rmap->n; i++) {
2418     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2419       va[i] = vb[i];
2420       if (idx) idx[i] = a->garray[idxb[i]];
2421     }
2422   }
2423 
2424   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2425   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2426   ierr = PetscFree(idxb);CHKERRQ(ierr);
2427   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2432 {
2433   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2434   PetscInt       n      = A->rmap->n;
2435   PetscInt       cstart = A->cmap->rstart;
2436   PetscInt       *cmap  = mat->garray;
2437   PetscInt       *diagIdx, *offdiagIdx;
2438   Vec            diagV, offdiagV;
2439   PetscScalar    *a, *diagA, *offdiagA;
2440   PetscInt       r;
2441   PetscErrorCode ierr;
2442 
2443   PetscFunctionBegin;
2444   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2445   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2447   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2450   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2451   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2452   for (r = 0; r < n; ++r) {
2453     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2454       a[r]   = diagA[r];
2455       idx[r] = cstart + diagIdx[r];
2456     } else {
2457       a[r]   = offdiagA[r];
2458       idx[r] = cmap[offdiagIdx[r]];
2459     }
2460   }
2461   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2464   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2465   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2466   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2471 {
2472   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2473   PetscInt       n      = A->rmap->n;
2474   PetscInt       cstart = A->cmap->rstart;
2475   PetscInt       *cmap  = mat->garray;
2476   PetscInt       *diagIdx, *offdiagIdx;
2477   Vec            diagV, offdiagV;
2478   PetscScalar    *a, *diagA, *offdiagA;
2479   PetscInt       r;
2480   PetscErrorCode ierr;
2481 
2482   PetscFunctionBegin;
2483   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2484   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2485   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2486   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2487   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2488   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2489   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2490   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2491   for (r = 0; r < n; ++r) {
2492     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2493       a[r]   = diagA[r];
2494       idx[r] = cstart + diagIdx[r];
2495     } else {
2496       a[r]   = offdiagA[r];
2497       idx[r] = cmap[offdiagIdx[r]];
2498     }
2499   }
2500   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2501   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2502   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2503   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2504   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2505   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2510 {
2511   PetscErrorCode ierr;
2512   Mat            *dummy;
2513 
2514   PetscFunctionBegin;
2515   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2516   *newmat = *dummy;
2517   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2522 {
2523   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2528   A->factorerrortype = a->A->factorerrortype;
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2533 {
2534   PetscErrorCode ierr;
2535   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2536 
2537   PetscFunctionBegin;
2538   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2539   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2540   if (x->assembled) {
2541     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2542   } else {
2543     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2544   }
2545   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2546   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2551 {
2552   PetscFunctionBegin;
2553   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2554   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 /*@
2559    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2560 
2561    Collective on Mat
2562 
2563    Input Parameters:
2564 +    A - the matrix
2565 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2566 
2567  Level: advanced
2568 
2569 @*/
2570 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2571 {
2572   PetscErrorCode       ierr;
2573 
2574   PetscFunctionBegin;
2575   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2580 {
2581   PetscErrorCode       ierr;
2582   PetscBool            sc = PETSC_FALSE,flg;
2583 
2584   PetscFunctionBegin;
2585   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2586   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2587   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2588   if (flg) {
2589     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2590   }
2591   ierr = PetscOptionsTail();CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2596 {
2597   PetscErrorCode ierr;
2598   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2599   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2600 
2601   PetscFunctionBegin;
2602   if (!Y->preallocated) {
2603     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2604   } else if (!aij->nz) {
2605     PetscInt nonew = aij->nonew;
2606     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2607     aij->nonew = nonew;
2608   }
2609   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2614 {
2615   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2616   PetscErrorCode ierr;
2617 
2618   PetscFunctionBegin;
2619   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2620   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2621   if (d) {
2622     PetscInt rstart;
2623     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2624     *d += rstart;
2625 
2626   }
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2631 {
2632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2633   PetscErrorCode ierr;
2634 
2635   PetscFunctionBegin;
2636   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 /* -------------------------------------------------------------------*/
2641 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2642                                        MatGetRow_MPIAIJ,
2643                                        MatRestoreRow_MPIAIJ,
2644                                        MatMult_MPIAIJ,
2645                                 /* 4*/ MatMultAdd_MPIAIJ,
2646                                        MatMultTranspose_MPIAIJ,
2647                                        MatMultTransposeAdd_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*10*/ 0,
2652                                        0,
2653                                        0,
2654                                        MatSOR_MPIAIJ,
2655                                        MatTranspose_MPIAIJ,
2656                                 /*15*/ MatGetInfo_MPIAIJ,
2657                                        MatEqual_MPIAIJ,
2658                                        MatGetDiagonal_MPIAIJ,
2659                                        MatDiagonalScale_MPIAIJ,
2660                                        MatNorm_MPIAIJ,
2661                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2662                                        MatAssemblyEnd_MPIAIJ,
2663                                        MatSetOption_MPIAIJ,
2664                                        MatZeroEntries_MPIAIJ,
2665                                 /*24*/ MatZeroRows_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*29*/ MatSetUp_MPIAIJ,
2671                                        0,
2672                                        0,
2673                                        MatGetDiagonalBlock_MPIAIJ,
2674                                        0,
2675                                 /*34*/ MatDuplicate_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*39*/ MatAXPY_MPIAIJ,
2681                                        MatCreateSubMatrices_MPIAIJ,
2682                                        MatIncreaseOverlap_MPIAIJ,
2683                                        MatGetValues_MPIAIJ,
2684                                        MatCopy_MPIAIJ,
2685                                 /*44*/ MatGetRowMax_MPIAIJ,
2686                                        MatScale_MPIAIJ,
2687                                        MatShift_MPIAIJ,
2688                                        MatDiagonalSet_MPIAIJ,
2689                                        MatZeroRowsColumns_MPIAIJ,
2690                                 /*49*/ MatSetRandom_MPIAIJ,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2696                                        0,
2697                                        MatSetUnfactored_MPIAIJ,
2698                                        MatPermute_MPIAIJ,
2699                                        0,
2700                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2701                                        MatDestroy_MPIAIJ,
2702                                        MatView_MPIAIJ,
2703                                        0,
2704                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2705                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2706                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2711                                        MatGetRowMinAbs_MPIAIJ,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                 /*75*/ MatFDColoringApply_AIJ,
2717                                        MatSetFromOptions_MPIAIJ,
2718                                        0,
2719                                        0,
2720                                        MatFindZeroDiagonals_MPIAIJ,
2721                                 /*80*/ 0,
2722                                        0,
2723                                        0,
2724                                 /*83*/ MatLoad_MPIAIJ,
2725                                        MatIsSymmetric_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2731                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2732                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2733                                        MatPtAP_MPIAIJ_MPIAIJ,
2734                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2735                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                        MatPinToCPU_MPIAIJ,
2740                                 /*99*/ 0,
2741                                        0,
2742                                        0,
2743                                        MatConjugate_MPIAIJ,
2744                                        0,
2745                                 /*104*/MatSetValuesRow_MPIAIJ,
2746                                        MatRealPart_MPIAIJ,
2747                                        MatImaginaryPart_MPIAIJ,
2748                                        0,
2749                                        0,
2750                                 /*109*/0,
2751                                        0,
2752                                        MatGetRowMin_MPIAIJ,
2753                                        0,
2754                                        MatMissingDiagonal_MPIAIJ,
2755                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2756                                        0,
2757                                        MatGetGhosts_MPIAIJ,
2758                                        0,
2759                                        0,
2760                                 /*119*/0,
2761                                        0,
2762                                        0,
2763                                        0,
2764                                        MatGetMultiProcBlock_MPIAIJ,
2765                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2766                                        MatGetColumnNorms_MPIAIJ,
2767                                        MatInvertBlockDiagonal_MPIAIJ,
2768                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2769                                        MatCreateSubMatricesMPI_MPIAIJ,
2770                                 /*129*/0,
2771                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2772                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2773                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2774                                        0,
2775                                 /*134*/0,
2776                                        0,
2777                                        MatRARt_MPIAIJ_MPIAIJ,
2778                                        0,
2779                                        0,
2780                                 /*139*/MatSetBlockSizes_MPIAIJ,
2781                                        0,
2782                                        0,
2783                                        MatFDColoringSetUp_MPIXAIJ,
2784                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2785                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2786 };
2787 
2788 /* ----------------------------------------------------------------------------------------*/
2789 
2790 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2791 {
2792   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2793   PetscErrorCode ierr;
2794 
2795   PetscFunctionBegin;
2796   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2797   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2802 {
2803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2808   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2813 {
2814   Mat_MPIAIJ     *b;
2815   PetscErrorCode ierr;
2816   PetscMPIInt    size;
2817 
2818   PetscFunctionBegin;
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   b = (Mat_MPIAIJ*)B->data;
2822 
2823 #if defined(PETSC_USE_CTABLE)
2824   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2825 #else
2826   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2827 #endif
2828   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2829   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2830   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2831 
2832   /* Because the B will have been resized we simply destroy it and create a new one each time */
2833   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2834   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2835   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2836   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2837   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2838   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2839   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2840 
2841   if (!B->preallocated) {
2842     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2843     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2844     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2845     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2846     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2847   }
2848 
2849   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2850   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2851   B->preallocated  = PETSC_TRUE;
2852   B->was_assembled = PETSC_FALSE;
2853   B->assembled     = PETSC_FALSE;
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2858 {
2859   Mat_MPIAIJ     *b;
2860   PetscErrorCode ierr;
2861 
2862   PetscFunctionBegin;
2863   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2864   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2865   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2866   b = (Mat_MPIAIJ*)B->data;
2867 
2868 #if defined(PETSC_USE_CTABLE)
2869   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2870 #else
2871   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2872 #endif
2873   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2874   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2875   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2876 
2877   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2878   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2879   B->preallocated  = PETSC_TRUE;
2880   B->was_assembled = PETSC_FALSE;
2881   B->assembled = PETSC_FALSE;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2886 {
2887   Mat            mat;
2888   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2889   PetscErrorCode ierr;
2890 
2891   PetscFunctionBegin;
2892   *newmat = 0;
2893   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2894   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2895   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2896   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2897   a       = (Mat_MPIAIJ*)mat->data;
2898 
2899   mat->factortype   = matin->factortype;
2900   mat->assembled    = PETSC_TRUE;
2901   mat->insertmode   = NOT_SET_VALUES;
2902   mat->preallocated = PETSC_TRUE;
2903 
2904   a->size         = oldmat->size;
2905   a->rank         = oldmat->rank;
2906   a->donotstash   = oldmat->donotstash;
2907   a->roworiented  = oldmat->roworiented;
2908   a->rowindices   = 0;
2909   a->rowvalues    = 0;
2910   a->getrowactive = PETSC_FALSE;
2911 
2912   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2913   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2914 
2915   if (oldmat->colmap) {
2916 #if defined(PETSC_USE_CTABLE)
2917     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2918 #else
2919     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2920     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2921     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2922 #endif
2923   } else a->colmap = 0;
2924   if (oldmat->garray) {
2925     PetscInt len;
2926     len  = oldmat->B->cmap->n;
2927     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2928     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2929     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2930   } else a->garray = 0;
2931 
2932   /* It may happen MatDuplicate is called with a non-assembled matrix
2933      In fact, MatDuplicate only requires the matrix to be preallocated
2934      This may happen inside a DMCreateMatrix_Shell */
2935   if (oldmat->lvec) {
2936     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2937     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2938   }
2939   if (oldmat->Mvctx) {
2940     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2941     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2942   }
2943   if (oldmat->Mvctx_mpi1) {
2944     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2945     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2946   }
2947 
2948   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2949   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2950   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2951   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2952   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2953   *newmat = mat;
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2958 {
2959   PetscBool      isbinary, ishdf5;
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2964   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2965   /* force binary viewer to load .info file if it has not yet done so */
2966   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2967   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2968   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2969   if (isbinary) {
2970     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2971   } else if (ishdf5) {
2972 #if defined(PETSC_HAVE_HDF5)
2973     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2974 #else
2975     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2976 #endif
2977   } else {
2978     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2979   }
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2984 {
2985   PetscScalar    *vals,*svals;
2986   MPI_Comm       comm;
2987   PetscErrorCode ierr;
2988   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2989   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2990   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2991   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2992   PetscInt       cend,cstart,n,*rowners;
2993   int            fd;
2994   PetscInt       bs = newMat->rmap->bs;
2995 
2996   PetscFunctionBegin;
2997   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2998   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2999   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3000   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3001   if (!rank) {
3002     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3003     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3004     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
3005   }
3006 
3007   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
3008   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3009   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3010   if (bs < 0) bs = 1;
3011 
3012   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3013   M    = header[1]; N = header[2];
3014 
3015   /* If global sizes are set, check if they are consistent with that given in the file */
3016   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3017   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3018 
3019   /* determine ownership of all (block) rows */
3020   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3021   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3022   else m = newMat->rmap->n; /* Set by user */
3023 
3024   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3025   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3026 
3027   /* First process needs enough room for process with most rows */
3028   if (!rank) {
3029     mmax = rowners[1];
3030     for (i=2; i<=size; i++) {
3031       mmax = PetscMax(mmax, rowners[i]);
3032     }
3033   } else mmax = -1;             /* unused, but compilers complain */
3034 
3035   rowners[0] = 0;
3036   for (i=2; i<=size; i++) {
3037     rowners[i] += rowners[i-1];
3038   }
3039   rstart = rowners[rank];
3040   rend   = rowners[rank+1];
3041 
3042   /* distribute row lengths to all processors */
3043   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3044   if (!rank) {
3045     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3046     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3047     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3048     for (j=0; j<m; j++) {
3049       procsnz[0] += ourlens[j];
3050     }
3051     for (i=1; i<size; i++) {
3052       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3053       /* calculate the number of nonzeros on each processor */
3054       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3055         procsnz[i] += rowlengths[j];
3056       }
3057       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3058     }
3059     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3060   } else {
3061     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3062   }
3063 
3064   if (!rank) {
3065     /* determine max buffer needed and allocate it */
3066     maxnz = 0;
3067     for (i=0; i<size; i++) {
3068       maxnz = PetscMax(maxnz,procsnz[i]);
3069     }
3070     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3071 
3072     /* read in my part of the matrix column indices  */
3073     nz   = procsnz[0];
3074     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3075     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3076 
3077     /* read in every one elses and ship off */
3078     for (i=1; i<size; i++) {
3079       nz   = procsnz[i];
3080       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3081       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3082     }
3083     ierr = PetscFree(cols);CHKERRQ(ierr);
3084   } else {
3085     /* determine buffer space needed for message */
3086     nz = 0;
3087     for (i=0; i<m; i++) {
3088       nz += ourlens[i];
3089     }
3090     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3091 
3092     /* receive message of column indices*/
3093     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3094   }
3095 
3096   /* determine column ownership if matrix is not square */
3097   if (N != M) {
3098     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3099     else n = newMat->cmap->n;
3100     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3101     cstart = cend - n;
3102   } else {
3103     cstart = rstart;
3104     cend   = rend;
3105     n      = cend - cstart;
3106   }
3107 
3108   /* loop over local rows, determining number of off diagonal entries */
3109   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3110   jj   = 0;
3111   for (i=0; i<m; i++) {
3112     for (j=0; j<ourlens[i]; j++) {
3113       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3114       jj++;
3115     }
3116   }
3117 
3118   for (i=0; i<m; i++) {
3119     ourlens[i] -= offlens[i];
3120   }
3121   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3122 
3123   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3124 
3125   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3126 
3127   for (i=0; i<m; i++) {
3128     ourlens[i] += offlens[i];
3129   }
3130 
3131   if (!rank) {
3132     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3133 
3134     /* read in my part of the matrix numerical values  */
3135     nz   = procsnz[0];
3136     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3137 
3138     /* insert into matrix */
3139     jj      = rstart;
3140     smycols = mycols;
3141     svals   = vals;
3142     for (i=0; i<m; i++) {
3143       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3144       smycols += ourlens[i];
3145       svals   += ourlens[i];
3146       jj++;
3147     }
3148 
3149     /* read in other processors and ship out */
3150     for (i=1; i<size; i++) {
3151       nz   = procsnz[i];
3152       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3153       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3154     }
3155     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3156   } else {
3157     /* receive numeric values */
3158     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3159 
3160     /* receive message of values*/
3161     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3162 
3163     /* insert into matrix */
3164     jj      = rstart;
3165     smycols = mycols;
3166     svals   = vals;
3167     for (i=0; i<m; i++) {
3168       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3169       smycols += ourlens[i];
3170       svals   += ourlens[i];
3171       jj++;
3172     }
3173   }
3174   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3175   ierr = PetscFree(vals);CHKERRQ(ierr);
3176   ierr = PetscFree(mycols);CHKERRQ(ierr);
3177   ierr = PetscFree(rowners);CHKERRQ(ierr);
3178   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3179   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 /* Not scalable because of ISAllGather() unless getting all columns. */
3184 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3185 {
3186   PetscErrorCode ierr;
3187   IS             iscol_local;
3188   PetscBool      isstride;
3189   PetscMPIInt    lisstride=0,gisstride;
3190 
3191   PetscFunctionBegin;
3192   /* check if we are grabbing all columns*/
3193   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3194 
3195   if (isstride) {
3196     PetscInt  start,len,mstart,mlen;
3197     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3198     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3199     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3200     if (mstart == start && mlen-mstart == len) lisstride = 1;
3201   }
3202 
3203   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3204   if (gisstride) {
3205     PetscInt N;
3206     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3207     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3208     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3209     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3210   } else {
3211     PetscInt cbs;
3212     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3213     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3214     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3215   }
3216 
3217   *isseq = iscol_local;
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 /*
3222  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3223  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3224 
3225  Input Parameters:
3226    mat - matrix
3227    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3228            i.e., mat->rstart <= isrow[i] < mat->rend
3229    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3230            i.e., mat->cstart <= iscol[i] < mat->cend
3231  Output Parameter:
3232    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3233    iscol_o - sequential column index set for retrieving mat->B
3234    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3235  */
3236 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3237 {
3238   PetscErrorCode ierr;
3239   Vec            x,cmap;
3240   const PetscInt *is_idx;
3241   PetscScalar    *xarray,*cmaparray;
3242   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3243   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3244   Mat            B=a->B;
3245   Vec            lvec=a->lvec,lcmap;
3246   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3247   MPI_Comm       comm;
3248   VecScatter     Mvctx=a->Mvctx;
3249 
3250   PetscFunctionBegin;
3251   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3252   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3253 
3254   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3255   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3256   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3257   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3258   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3259 
3260   /* Get start indices */
3261   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3262   isstart -= ncols;
3263   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3264 
3265   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3266   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3267   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3268   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3269   for (i=0; i<ncols; i++) {
3270     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3271     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3272     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3273   }
3274   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3275   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3276   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3277 
3278   /* Get iscol_d */
3279   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3280   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3281   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3282 
3283   /* Get isrow_d */
3284   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3285   rstart = mat->rmap->rstart;
3286   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3287   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3288   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3289   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3290 
3291   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3292   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3293   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3294 
3295   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3296   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3297   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3298 
3299   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3300 
3301   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3302   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3303 
3304   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3305   /* off-process column indices */
3306   count = 0;
3307   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3308   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3309 
3310   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3311   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3312   for (i=0; i<Bn; i++) {
3313     if (PetscRealPart(xarray[i]) > -1.0) {
3314       idx[count]     = i;                   /* local column index in off-diagonal part B */
3315       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3316       count++;
3317     }
3318   }
3319   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3320   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3321 
3322   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3323   /* cannot ensure iscol_o has same blocksize as iscol! */
3324 
3325   ierr = PetscFree(idx);CHKERRQ(ierr);
3326   *garray = cmap1;
3327 
3328   ierr = VecDestroy(&x);CHKERRQ(ierr);
3329   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3330   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3335 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3336 {
3337   PetscErrorCode ierr;
3338   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3339   Mat            M = NULL;
3340   MPI_Comm       comm;
3341   IS             iscol_d,isrow_d,iscol_o;
3342   Mat            Asub = NULL,Bsub = NULL;
3343   PetscInt       n;
3344 
3345   PetscFunctionBegin;
3346   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3347 
3348   if (call == MAT_REUSE_MATRIX) {
3349     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3350     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3351     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3352 
3353     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3354     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3355 
3356     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3357     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3358 
3359     /* Update diagonal and off-diagonal portions of submat */
3360     asub = (Mat_MPIAIJ*)(*submat)->data;
3361     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3362     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3363     if (n) {
3364       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3365     }
3366     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3367     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3368 
3369   } else { /* call == MAT_INITIAL_MATRIX) */
3370     const PetscInt *garray;
3371     PetscInt        BsubN;
3372 
3373     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3374     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3375 
3376     /* Create local submatrices Asub and Bsub */
3377     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3378     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3379 
3380     /* Create submatrix M */
3381     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3382 
3383     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3384     asub = (Mat_MPIAIJ*)M->data;
3385 
3386     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3387     n = asub->B->cmap->N;
3388     if (BsubN > n) {
3389       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3390       const PetscInt *idx;
3391       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3392       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3393 
3394       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3395       j = 0;
3396       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3397       for (i=0; i<n; i++) {
3398         if (j >= BsubN) break;
3399         while (subgarray[i] > garray[j]) j++;
3400 
3401         if (subgarray[i] == garray[j]) {
3402           idx_new[i] = idx[j++];
3403         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3404       }
3405       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3406 
3407       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3408       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3409 
3410     } else if (BsubN < n) {
3411       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3412     }
3413 
3414     ierr = PetscFree(garray);CHKERRQ(ierr);
3415     *submat = M;
3416 
3417     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3418     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3419     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3420 
3421     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3422     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3423 
3424     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3425     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3426   }
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3431 {
3432   PetscErrorCode ierr;
3433   IS             iscol_local=NULL,isrow_d;
3434   PetscInt       csize;
3435   PetscInt       n,i,j,start,end;
3436   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3437   MPI_Comm       comm;
3438 
3439   PetscFunctionBegin;
3440   /* If isrow has same processor distribution as mat,
3441      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3442   if (call == MAT_REUSE_MATRIX) {
3443     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3444     if (isrow_d) {
3445       sameRowDist  = PETSC_TRUE;
3446       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3447     } else {
3448       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3449       if (iscol_local) {
3450         sameRowDist  = PETSC_TRUE;
3451         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3452       }
3453     }
3454   } else {
3455     /* Check if isrow has same processor distribution as mat */
3456     sameDist[0] = PETSC_FALSE;
3457     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3458     if (!n) {
3459       sameDist[0] = PETSC_TRUE;
3460     } else {
3461       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3462       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3463       if (i >= start && j < end) {
3464         sameDist[0] = PETSC_TRUE;
3465       }
3466     }
3467 
3468     /* Check if iscol has same processor distribution as mat */
3469     sameDist[1] = PETSC_FALSE;
3470     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3471     if (!n) {
3472       sameDist[1] = PETSC_TRUE;
3473     } else {
3474       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3475       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3476       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3477     }
3478 
3479     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3480     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3481     sameRowDist = tsameDist[0];
3482   }
3483 
3484   if (sameRowDist) {
3485     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3486       /* isrow and iscol have same processor distribution as mat */
3487       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3488       PetscFunctionReturn(0);
3489     } else { /* sameRowDist */
3490       /* isrow has same processor distribution as mat */
3491       if (call == MAT_INITIAL_MATRIX) {
3492         PetscBool sorted;
3493         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3494         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3495         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3496         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3497 
3498         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3499         if (sorted) {
3500           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3501           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3502           PetscFunctionReturn(0);
3503         }
3504       } else { /* call == MAT_REUSE_MATRIX */
3505         IS    iscol_sub;
3506         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3507         if (iscol_sub) {
3508           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3509           PetscFunctionReturn(0);
3510         }
3511       }
3512     }
3513   }
3514 
3515   /* General case: iscol -> iscol_local which has global size of iscol */
3516   if (call == MAT_REUSE_MATRIX) {
3517     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3518     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3519   } else {
3520     if (!iscol_local) {
3521       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3522     }
3523   }
3524 
3525   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3526   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3527 
3528   if (call == MAT_INITIAL_MATRIX) {
3529     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3530     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3531   }
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 /*@C
3536      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3537          and "off-diagonal" part of the matrix in CSR format.
3538 
3539    Collective
3540 
3541    Input Parameters:
3542 +  comm - MPI communicator
3543 .  A - "diagonal" portion of matrix
3544 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3545 -  garray - global index of B columns
3546 
3547    Output Parameter:
3548 .   mat - the matrix, with input A as its local diagonal matrix
3549    Level: advanced
3550 
3551    Notes:
3552        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3553        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3554 
3555 .seealso: MatCreateMPIAIJWithSplitArrays()
3556 @*/
3557 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3558 {
3559   PetscErrorCode ierr;
3560   Mat_MPIAIJ     *maij;
3561   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3562   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3563   PetscScalar    *oa=b->a;
3564   Mat            Bnew;
3565   PetscInt       m,n,N;
3566 
3567   PetscFunctionBegin;
3568   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3569   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3570   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3571   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3572   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3573   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3574 
3575   /* Get global columns of mat */
3576   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3577 
3578   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3579   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3580   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3581   maij = (Mat_MPIAIJ*)(*mat)->data;
3582 
3583   (*mat)->preallocated = PETSC_TRUE;
3584 
3585   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3586   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3587 
3588   /* Set A as diagonal portion of *mat */
3589   maij->A = A;
3590 
3591   nz = oi[m];
3592   for (i=0; i<nz; i++) {
3593     col   = oj[i];
3594     oj[i] = garray[col];
3595   }
3596 
3597    /* Set Bnew as off-diagonal portion of *mat */
3598   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3599   bnew        = (Mat_SeqAIJ*)Bnew->data;
3600   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3601   maij->B     = Bnew;
3602 
3603   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3604 
3605   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3606   b->free_a       = PETSC_FALSE;
3607   b->free_ij      = PETSC_FALSE;
3608   ierr = MatDestroy(&B);CHKERRQ(ierr);
3609 
3610   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3611   bnew->free_a       = PETSC_TRUE;
3612   bnew->free_ij      = PETSC_TRUE;
3613 
3614   /* condense columns of maij->B */
3615   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3616   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3617   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3619   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3620   PetscFunctionReturn(0);
3621 }
3622 
3623 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3624 
3625 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3626 {
3627   PetscErrorCode ierr;
3628   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3629   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3630   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3631   Mat            M,Msub,B=a->B;
3632   MatScalar      *aa;
3633   Mat_SeqAIJ     *aij;
3634   PetscInt       *garray = a->garray,*colsub,Ncols;
3635   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3636   IS             iscol_sub,iscmap;
3637   const PetscInt *is_idx,*cmap;
3638   PetscBool      allcolumns=PETSC_FALSE;
3639   MPI_Comm       comm;
3640 
3641   PetscFunctionBegin;
3642   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3643 
3644   if (call == MAT_REUSE_MATRIX) {
3645     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3646     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3647     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3648 
3649     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3650     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3651 
3652     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3653     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3654 
3655     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3656 
3657   } else { /* call == MAT_INITIAL_MATRIX) */
3658     PetscBool flg;
3659 
3660     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3661     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3662 
3663     /* (1) iscol -> nonscalable iscol_local */
3664     /* Check for special case: each processor gets entire matrix columns */
3665     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3666     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3667     if (allcolumns) {
3668       iscol_sub = iscol_local;
3669       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3670       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3671 
3672     } else {
3673       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3674       PetscInt *idx,*cmap1,k;
3675       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3676       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3677       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3678       count = 0;
3679       k     = 0;
3680       for (i=0; i<Ncols; i++) {
3681         j = is_idx[i];
3682         if (j >= cstart && j < cend) {
3683           /* diagonal part of mat */
3684           idx[count]     = j;
3685           cmap1[count++] = i; /* column index in submat */
3686         } else if (Bn) {
3687           /* off-diagonal part of mat */
3688           if (j == garray[k]) {
3689             idx[count]     = j;
3690             cmap1[count++] = i;  /* column index in submat */
3691           } else if (j > garray[k]) {
3692             while (j > garray[k] && k < Bn-1) k++;
3693             if (j == garray[k]) {
3694               idx[count]     = j;
3695               cmap1[count++] = i; /* column index in submat */
3696             }
3697           }
3698         }
3699       }
3700       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3701 
3702       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3703       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3704       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3705 
3706       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3707     }
3708 
3709     /* (3) Create sequential Msub */
3710     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3711   }
3712 
3713   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3714   aij  = (Mat_SeqAIJ*)(Msub)->data;
3715   ii   = aij->i;
3716   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3717 
3718   /*
3719       m - number of local rows
3720       Ncols - number of columns (same on all processors)
3721       rstart - first row in new global matrix generated
3722   */
3723   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3724 
3725   if (call == MAT_INITIAL_MATRIX) {
3726     /* (4) Create parallel newmat */
3727     PetscMPIInt    rank,size;
3728     PetscInt       csize;
3729 
3730     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3731     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3732 
3733     /*
3734         Determine the number of non-zeros in the diagonal and off-diagonal
3735         portions of the matrix in order to do correct preallocation
3736     */
3737 
3738     /* first get start and end of "diagonal" columns */
3739     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3740     if (csize == PETSC_DECIDE) {
3741       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3742       if (mglobal == Ncols) { /* square matrix */
3743         nlocal = m;
3744       } else {
3745         nlocal = Ncols/size + ((Ncols % size) > rank);
3746       }
3747     } else {
3748       nlocal = csize;
3749     }
3750     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3751     rstart = rend - nlocal;
3752     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3753 
3754     /* next, compute all the lengths */
3755     jj    = aij->j;
3756     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3757     olens = dlens + m;
3758     for (i=0; i<m; i++) {
3759       jend = ii[i+1] - ii[i];
3760       olen = 0;
3761       dlen = 0;
3762       for (j=0; j<jend; j++) {
3763         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3764         else dlen++;
3765         jj++;
3766       }
3767       olens[i] = olen;
3768       dlens[i] = dlen;
3769     }
3770 
3771     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3772     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3773 
3774     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3775     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3776     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3777     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3778     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3779     ierr = PetscFree(dlens);CHKERRQ(ierr);
3780 
3781   } else { /* call == MAT_REUSE_MATRIX */
3782     M    = *newmat;
3783     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3784     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3785     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3786     /*
3787          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3788        rather than the slower MatSetValues().
3789     */
3790     M->was_assembled = PETSC_TRUE;
3791     M->assembled     = PETSC_FALSE;
3792   }
3793 
3794   /* (5) Set values of Msub to *newmat */
3795   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3796   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3797 
3798   jj   = aij->j;
3799   aa   = aij->a;
3800   for (i=0; i<m; i++) {
3801     row = rstart + i;
3802     nz  = ii[i+1] - ii[i];
3803     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3804     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3805     jj += nz; aa += nz;
3806   }
3807   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3808 
3809   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811 
3812   ierr = PetscFree(colsub);CHKERRQ(ierr);
3813 
3814   /* save Msub, iscol_sub and iscmap used in processor for next request */
3815   if (call ==  MAT_INITIAL_MATRIX) {
3816     *newmat = M;
3817     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3818     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3819 
3820     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3821     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3822 
3823     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3824     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3825 
3826     if (iscol_local) {
3827       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3828       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3829     }
3830   }
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 /*
3835     Not great since it makes two copies of the submatrix, first an SeqAIJ
3836   in local and then by concatenating the local matrices the end result.
3837   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3838 
3839   Note: This requires a sequential iscol with all indices.
3840 */
3841 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3842 {
3843   PetscErrorCode ierr;
3844   PetscMPIInt    rank,size;
3845   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3846   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3847   Mat            M,Mreuse;
3848   MatScalar      *aa,*vwork;
3849   MPI_Comm       comm;
3850   Mat_SeqAIJ     *aij;
3851   PetscBool      colflag,allcolumns=PETSC_FALSE;
3852 
3853   PetscFunctionBegin;
3854   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3855   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3856   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3857 
3858   /* Check for special case: each processor gets entire matrix columns */
3859   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3860   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3861   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3862 
3863   if (call ==  MAT_REUSE_MATRIX) {
3864     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3865     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3866     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3867   } else {
3868     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3869   }
3870 
3871   /*
3872       m - number of local rows
3873       n - number of columns (same on all processors)
3874       rstart - first row in new global matrix generated
3875   */
3876   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3877   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3878   if (call == MAT_INITIAL_MATRIX) {
3879     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3880     ii  = aij->i;
3881     jj  = aij->j;
3882 
3883     /*
3884         Determine the number of non-zeros in the diagonal and off-diagonal
3885         portions of the matrix in order to do correct preallocation
3886     */
3887 
3888     /* first get start and end of "diagonal" columns */
3889     if (csize == PETSC_DECIDE) {
3890       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3891       if (mglobal == n) { /* square matrix */
3892         nlocal = m;
3893       } else {
3894         nlocal = n/size + ((n % size) > rank);
3895       }
3896     } else {
3897       nlocal = csize;
3898     }
3899     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3900     rstart = rend - nlocal;
3901     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3902 
3903     /* next, compute all the lengths */
3904     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3905     olens = dlens + m;
3906     for (i=0; i<m; i++) {
3907       jend = ii[i+1] - ii[i];
3908       olen = 0;
3909       dlen = 0;
3910       for (j=0; j<jend; j++) {
3911         if (*jj < rstart || *jj >= rend) olen++;
3912         else dlen++;
3913         jj++;
3914       }
3915       olens[i] = olen;
3916       dlens[i] = dlen;
3917     }
3918     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3919     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3920     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3921     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3922     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3923     ierr = PetscFree(dlens);CHKERRQ(ierr);
3924   } else {
3925     PetscInt ml,nl;
3926 
3927     M    = *newmat;
3928     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3929     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3930     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3931     /*
3932          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3933        rather than the slower MatSetValues().
3934     */
3935     M->was_assembled = PETSC_TRUE;
3936     M->assembled     = PETSC_FALSE;
3937   }
3938   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3939   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3940   ii   = aij->i;
3941   jj   = aij->j;
3942   aa   = aij->a;
3943   for (i=0; i<m; i++) {
3944     row   = rstart + i;
3945     nz    = ii[i+1] - ii[i];
3946     cwork = jj;     jj += nz;
3947     vwork = aa;     aa += nz;
3948     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3949   }
3950 
3951   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3952   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   *newmat = M;
3954 
3955   /* save submatrix used in processor for next request */
3956   if (call ==  MAT_INITIAL_MATRIX) {
3957     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3958     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3959   }
3960   PetscFunctionReturn(0);
3961 }
3962 
3963 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3964 {
3965   PetscInt       m,cstart, cend,j,nnz,i,d;
3966   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3967   const PetscInt *JJ;
3968   PetscErrorCode ierr;
3969   PetscBool      nooffprocentries;
3970 
3971   PetscFunctionBegin;
3972   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3973 
3974   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3975   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3976   m      = B->rmap->n;
3977   cstart = B->cmap->rstart;
3978   cend   = B->cmap->rend;
3979   rstart = B->rmap->rstart;
3980 
3981   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3982 
3983 #if defined(PETSC_USE_DEBUG)
3984   for (i=0; i<m; i++) {
3985     nnz = Ii[i+1]- Ii[i];
3986     JJ  = J + Ii[i];
3987     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3988     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3989     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3990   }
3991 #endif
3992 
3993   for (i=0; i<m; i++) {
3994     nnz     = Ii[i+1]- Ii[i];
3995     JJ      = J + Ii[i];
3996     nnz_max = PetscMax(nnz_max,nnz);
3997     d       = 0;
3998     for (j=0; j<nnz; j++) {
3999       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4000     }
4001     d_nnz[i] = d;
4002     o_nnz[i] = nnz - d;
4003   }
4004   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4005   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4006 
4007   for (i=0; i<m; i++) {
4008     ii   = i + rstart;
4009     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4010   }
4011   nooffprocentries    = B->nooffprocentries;
4012   B->nooffprocentries = PETSC_TRUE;
4013   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4014   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4015   B->nooffprocentries = nooffprocentries;
4016 
4017   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4018   PetscFunctionReturn(0);
4019 }
4020 
4021 /*@
4022    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4023    (the default parallel PETSc format).
4024 
4025    Collective
4026 
4027    Input Parameters:
4028 +  B - the matrix
4029 .  i - the indices into j for the start of each local row (starts with zero)
4030 .  j - the column indices for each local row (starts with zero)
4031 -  v - optional values in the matrix
4032 
4033    Level: developer
4034 
4035    Notes:
4036        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4037      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4038      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4039 
4040        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4041 
4042        The format which is used for the sparse matrix input, is equivalent to a
4043     row-major ordering.. i.e for the following matrix, the input data expected is
4044     as shown
4045 
4046 $        1 0 0
4047 $        2 0 3     P0
4048 $       -------
4049 $        4 5 6     P1
4050 $
4051 $     Process0 [P0]: rows_owned=[0,1]
4052 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4053 $        j =  {0,0,2}  [size = 3]
4054 $        v =  {1,2,3}  [size = 3]
4055 $
4056 $     Process1 [P1]: rows_owned=[2]
4057 $        i =  {0,3}    [size = nrow+1  = 1+1]
4058 $        j =  {0,1,2}  [size = 3]
4059 $        v =  {4,5,6}  [size = 3]
4060 
4061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4062           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4063 @*/
4064 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4065 {
4066   PetscErrorCode ierr;
4067 
4068   PetscFunctionBegin;
4069   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4070   PetscFunctionReturn(0);
4071 }
4072 
4073 /*@C
4074    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4075    (the default parallel PETSc format).  For good matrix assembly performance
4076    the user should preallocate the matrix storage by setting the parameters
4077    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4078    performance can be increased by more than a factor of 50.
4079 
4080    Collective
4081 
4082    Input Parameters:
4083 +  B - the matrix
4084 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4085            (same value is used for all local rows)
4086 .  d_nnz - array containing the number of nonzeros in the various rows of the
4087            DIAGONAL portion of the local submatrix (possibly different for each row)
4088            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4089            The size of this array is equal to the number of local rows, i.e 'm'.
4090            For matrices that will be factored, you must leave room for (and set)
4091            the diagonal entry even if it is zero.
4092 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4093            submatrix (same value is used for all local rows).
4094 -  o_nnz - array containing the number of nonzeros in the various rows of the
4095            OFF-DIAGONAL portion of the local submatrix (possibly different for
4096            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4097            structure. The size of this array is equal to the number
4098            of local rows, i.e 'm'.
4099 
4100    If the *_nnz parameter is given then the *_nz parameter is ignored
4101 
4102    The AIJ format (also called the Yale sparse matrix format or
4103    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4104    storage.  The stored row and column indices begin with zero.
4105    See Users-Manual: ch_mat for details.
4106 
4107    The parallel matrix is partitioned such that the first m0 rows belong to
4108    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4109    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4110 
4111    The DIAGONAL portion of the local submatrix of a processor can be defined
4112    as the submatrix which is obtained by extraction the part corresponding to
4113    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4114    first row that belongs to the processor, r2 is the last row belonging to
4115    the this processor, and c1-c2 is range of indices of the local part of a
4116    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4117    common case of a square matrix, the row and column ranges are the same and
4118    the DIAGONAL part is also square. The remaining portion of the local
4119    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4120 
4121    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4122 
4123    You can call MatGetInfo() to get information on how effective the preallocation was;
4124    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4125    You can also run with the option -info and look for messages with the string
4126    malloc in them to see if additional memory allocation was needed.
4127 
4128    Example usage:
4129 
4130    Consider the following 8x8 matrix with 34 non-zero values, that is
4131    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4132    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4133    as follows:
4134 
4135 .vb
4136             1  2  0  |  0  3  0  |  0  4
4137     Proc0   0  5  6  |  7  0  0  |  8  0
4138             9  0 10  | 11  0  0  | 12  0
4139     -------------------------------------
4140            13  0 14  | 15 16 17  |  0  0
4141     Proc1   0 18  0  | 19 20 21  |  0  0
4142             0  0  0  | 22 23  0  | 24  0
4143     -------------------------------------
4144     Proc2  25 26 27  |  0  0 28  | 29  0
4145            30  0  0  | 31 32 33  |  0 34
4146 .ve
4147 
4148    This can be represented as a collection of submatrices as:
4149 
4150 .vb
4151       A B C
4152       D E F
4153       G H I
4154 .ve
4155 
4156    Where the submatrices A,B,C are owned by proc0, D,E,F are
4157    owned by proc1, G,H,I are owned by proc2.
4158 
4159    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4160    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4161    The 'M','N' parameters are 8,8, and have the same values on all procs.
4162 
4163    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4164    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4165    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4166    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4167    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4168    matrix, ans [DF] as another SeqAIJ matrix.
4169 
4170    When d_nz, o_nz parameters are specified, d_nz storage elements are
4171    allocated for every row of the local diagonal submatrix, and o_nz
4172    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4173    One way to choose d_nz and o_nz is to use the max nonzerors per local
4174    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4175    In this case, the values of d_nz,o_nz are:
4176 .vb
4177      proc0 : dnz = 2, o_nz = 2
4178      proc1 : dnz = 3, o_nz = 2
4179      proc2 : dnz = 1, o_nz = 4
4180 .ve
4181    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4182    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4183    for proc3. i.e we are using 12+15+10=37 storage locations to store
4184    34 values.
4185 
4186    When d_nnz, o_nnz parameters are specified, the storage is specified
4187    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4188    In the above case the values for d_nnz,o_nnz are:
4189 .vb
4190      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4191      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4192      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4193 .ve
4194    Here the space allocated is sum of all the above values i.e 34, and
4195    hence pre-allocation is perfect.
4196 
4197    Level: intermediate
4198 
4199 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4200           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4201 @*/
4202 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4203 {
4204   PetscErrorCode ierr;
4205 
4206   PetscFunctionBegin;
4207   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4208   PetscValidType(B,1);
4209   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 /*@
4214      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4215          CSR format for the local rows.
4216 
4217    Collective
4218 
4219    Input Parameters:
4220 +  comm - MPI communicator
4221 .  m - number of local rows (Cannot be PETSC_DECIDE)
4222 .  n - This value should be the same as the local size used in creating the
4223        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4224        calculated if N is given) For square matrices n is almost always m.
4225 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4226 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4227 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4228 .   j - column indices
4229 -   a - matrix values
4230 
4231    Output Parameter:
4232 .   mat - the matrix
4233 
4234    Level: intermediate
4235 
4236    Notes:
4237        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4238      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4239      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4240 
4241        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4242 
4243        The format which is used for the sparse matrix input, is equivalent to a
4244     row-major ordering.. i.e for the following matrix, the input data expected is
4245     as shown
4246 
4247        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4248 
4249 $        1 0 0
4250 $        2 0 3     P0
4251 $       -------
4252 $        4 5 6     P1
4253 $
4254 $     Process0 [P0]: rows_owned=[0,1]
4255 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4256 $        j =  {0,0,2}  [size = 3]
4257 $        v =  {1,2,3}  [size = 3]
4258 $
4259 $     Process1 [P1]: rows_owned=[2]
4260 $        i =  {0,3}    [size = nrow+1  = 1+1]
4261 $        j =  {0,1,2}  [size = 3]
4262 $        v =  {4,5,6}  [size = 3]
4263 
4264 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4265           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4266 @*/
4267 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4268 {
4269   PetscErrorCode ierr;
4270 
4271   PetscFunctionBegin;
4272   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4273   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4274   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4275   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4276   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4277   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4278   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4279   PetscFunctionReturn(0);
4280 }
4281 
4282 /*@
4283      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4284          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  mat - the matrix
4290 .  m - number of local rows (Cannot be PETSC_DECIDE)
4291 .  n - This value should be the same as the local size used in creating the
4292        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4293        calculated if N is given) For square matrices n is almost always m.
4294 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4295 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4296 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4297 .  J - column indices
4298 -  v - matrix values
4299 
4300    Level: intermediate
4301 
4302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4303           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4304 @*/
4305 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4306 {
4307   PetscErrorCode ierr;
4308   PetscInt       cstart,nnz,i,j;
4309   PetscInt       *ld;
4310   PetscBool      nooffprocentries;
4311   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4312   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4313   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4314   const PetscInt *Adi = Ad->i;
4315   PetscInt       ldi,Iii,md;
4316 
4317   PetscFunctionBegin;
4318   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4319   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4320   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4321   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4322 
4323   cstart = mat->cmap->rstart;
4324   if (!Aij->ld) {
4325     /* count number of entries below block diagonal */
4326     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4327     Aij->ld = ld;
4328     for (i=0; i<m; i++) {
4329       nnz  = Ii[i+1]- Ii[i];
4330       j     = 0;
4331       while  (J[j] < cstart && j < nnz) {j++;}
4332       J    += nnz;
4333       ld[i] = j;
4334     }
4335   } else {
4336     ld = Aij->ld;
4337   }
4338 
4339   for (i=0; i<m; i++) {
4340     nnz  = Ii[i+1]- Ii[i];
4341     Iii  = Ii[i];
4342     ldi  = ld[i];
4343     md   = Adi[i+1]-Adi[i];
4344     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4345     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4346     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4347     ad  += md;
4348     ao  += nnz - md;
4349   }
4350   nooffprocentries      = mat->nooffprocentries;
4351   mat->nooffprocentries = PETSC_TRUE;
4352   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4353   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4354   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4355   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4356   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4357   mat->nooffprocentries = nooffprocentries;
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 /*@C
4362    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4363    (the default parallel PETSc format).  For good matrix assembly performance
4364    the user should preallocate the matrix storage by setting the parameters
4365    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4366    performance can be increased by more than a factor of 50.
4367 
4368    Collective
4369 
4370    Input Parameters:
4371 +  comm - MPI communicator
4372 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4373            This value should be the same as the local size used in creating the
4374            y vector for the matrix-vector product y = Ax.
4375 .  n - This value should be the same as the local size used in creating the
4376        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4377        calculated if N is given) For square matrices n is almost always m.
4378 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4379 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4380 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4381            (same value is used for all local rows)
4382 .  d_nnz - array containing the number of nonzeros in the various rows of the
4383            DIAGONAL portion of the local submatrix (possibly different for each row)
4384            or NULL, if d_nz is used to specify the nonzero structure.
4385            The size of this array is equal to the number of local rows, i.e 'm'.
4386 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4387            submatrix (same value is used for all local rows).
4388 -  o_nnz - array containing the number of nonzeros in the various rows of the
4389            OFF-DIAGONAL portion of the local submatrix (possibly different for
4390            each row) or NULL, if o_nz is used to specify the nonzero
4391            structure. The size of this array is equal to the number
4392            of local rows, i.e 'm'.
4393 
4394    Output Parameter:
4395 .  A - the matrix
4396 
4397    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4398    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4399    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4400 
4401    Notes:
4402    If the *_nnz parameter is given then the *_nz parameter is ignored
4403 
4404    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4405    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4406    storage requirements for this matrix.
4407 
4408    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4409    processor than it must be used on all processors that share the object for
4410    that argument.
4411 
4412    The user MUST specify either the local or global matrix dimensions
4413    (possibly both).
4414 
4415    The parallel matrix is partitioned across processors such that the
4416    first m0 rows belong to process 0, the next m1 rows belong to
4417    process 1, the next m2 rows belong to process 2 etc.. where
4418    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4419    values corresponding to [m x N] submatrix.
4420 
4421    The columns are logically partitioned with the n0 columns belonging
4422    to 0th partition, the next n1 columns belonging to the next
4423    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4424 
4425    The DIAGONAL portion of the local submatrix on any given processor
4426    is the submatrix corresponding to the rows and columns m,n
4427    corresponding to the given processor. i.e diagonal matrix on
4428    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4429    etc. The remaining portion of the local submatrix [m x (N-n)]
4430    constitute the OFF-DIAGONAL portion. The example below better
4431    illustrates this concept.
4432 
4433    For a square global matrix we define each processor's diagonal portion
4434    to be its local rows and the corresponding columns (a square submatrix);
4435    each processor's off-diagonal portion encompasses the remainder of the
4436    local matrix (a rectangular submatrix).
4437 
4438    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4439 
4440    When calling this routine with a single process communicator, a matrix of
4441    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4442    type of communicator, use the construction mechanism
4443 .vb
4444      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4445 .ve
4446 
4447 $     MatCreate(...,&A);
4448 $     MatSetType(A,MATMPIAIJ);
4449 $     MatSetSizes(A, m,n,M,N);
4450 $     MatMPIAIJSetPreallocation(A,...);
4451 
4452    By default, this format uses inodes (identical nodes) when possible.
4453    We search for consecutive rows with the same nonzero structure, thereby
4454    reusing matrix information to achieve increased efficiency.
4455 
4456    Options Database Keys:
4457 +  -mat_no_inode  - Do not use inodes
4458 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4459 
4460 
4461 
4462    Example usage:
4463 
4464    Consider the following 8x8 matrix with 34 non-zero values, that is
4465    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4466    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4467    as follows
4468 
4469 .vb
4470             1  2  0  |  0  3  0  |  0  4
4471     Proc0   0  5  6  |  7  0  0  |  8  0
4472             9  0 10  | 11  0  0  | 12  0
4473     -------------------------------------
4474            13  0 14  | 15 16 17  |  0  0
4475     Proc1   0 18  0  | 19 20 21  |  0  0
4476             0  0  0  | 22 23  0  | 24  0
4477     -------------------------------------
4478     Proc2  25 26 27  |  0  0 28  | 29  0
4479            30  0  0  | 31 32 33  |  0 34
4480 .ve
4481 
4482    This can be represented as a collection of submatrices as
4483 
4484 .vb
4485       A B C
4486       D E F
4487       G H I
4488 .ve
4489 
4490    Where the submatrices A,B,C are owned by proc0, D,E,F are
4491    owned by proc1, G,H,I are owned by proc2.
4492 
4493    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4494    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4495    The 'M','N' parameters are 8,8, and have the same values on all procs.
4496 
4497    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4498    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4499    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4500    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4501    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4502    matrix, ans [DF] as another SeqAIJ matrix.
4503 
4504    When d_nz, o_nz parameters are specified, d_nz storage elements are
4505    allocated for every row of the local diagonal submatrix, and o_nz
4506    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4507    One way to choose d_nz and o_nz is to use the max nonzerors per local
4508    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4509    In this case, the values of d_nz,o_nz are
4510 .vb
4511      proc0 : dnz = 2, o_nz = 2
4512      proc1 : dnz = 3, o_nz = 2
4513      proc2 : dnz = 1, o_nz = 4
4514 .ve
4515    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4516    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4517    for proc3. i.e we are using 12+15+10=37 storage locations to store
4518    34 values.
4519 
4520    When d_nnz, o_nnz parameters are specified, the storage is specified
4521    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4522    In the above case the values for d_nnz,o_nnz are
4523 .vb
4524      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4525      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4526      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4527 .ve
4528    Here the space allocated is sum of all the above values i.e 34, and
4529    hence pre-allocation is perfect.
4530 
4531    Level: intermediate
4532 
4533 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4534           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4535 @*/
4536 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4537 {
4538   PetscErrorCode ierr;
4539   PetscMPIInt    size;
4540 
4541   PetscFunctionBegin;
4542   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4543   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4545   if (size > 1) {
4546     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4547     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4548   } else {
4549     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4550     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4551   }
4552   PetscFunctionReturn(0);
4553 }
4554 
4555 /*@C
4556   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4557 
4558   Not collective
4559 
4560   Input Parameter:
4561 . A - The MPIAIJ matrix
4562 
4563   Output Parameters:
4564 + Ad - The local diagonal block as a SeqAIJ matrix
4565 . Ao - The local off-diagonal block as a SeqAIJ matrix
4566 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4567 
4568   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4569   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4570   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4571   local column numbers to global column numbers in the original matrix.
4572 
4573   Level: intermediate
4574 
4575 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ
4576 @*/
4577 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4578 {
4579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4580   PetscBool      flg;
4581   PetscErrorCode ierr;
4582 
4583   PetscFunctionBegin;
4584   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4585   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4586   if (Ad)     *Ad     = a->A;
4587   if (Ao)     *Ao     = a->B;
4588   if (colmap) *colmap = a->garray;
4589   PetscFunctionReturn(0);
4590 }
4591 
4592 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4593 {
4594   PetscErrorCode ierr;
4595   PetscInt       m,N,i,rstart,nnz,Ii;
4596   PetscInt       *indx;
4597   PetscScalar    *values;
4598 
4599   PetscFunctionBegin;
4600   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4601   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4602     PetscInt       *dnz,*onz,sum,bs,cbs;
4603 
4604     if (n == PETSC_DECIDE) {
4605       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4606     }
4607     /* Check sum(n) = N */
4608     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4609     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4610 
4611     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4612     rstart -= m;
4613 
4614     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4615     for (i=0; i<m; i++) {
4616       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4617       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4618       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4619     }
4620 
4621     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4622     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4623     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4624     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4625     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4626     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4627     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4628     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4629   }
4630 
4631   /* numeric phase */
4632   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4633   for (i=0; i<m; i++) {
4634     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4635     Ii   = i + rstart;
4636     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4637     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4638   }
4639   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4640   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4645 {
4646   PetscErrorCode    ierr;
4647   PetscMPIInt       rank;
4648   PetscInt          m,N,i,rstart,nnz;
4649   size_t            len;
4650   const PetscInt    *indx;
4651   PetscViewer       out;
4652   char              *name;
4653   Mat               B;
4654   const PetscScalar *values;
4655 
4656   PetscFunctionBegin;
4657   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4658   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4659   /* Should this be the type of the diagonal block of A? */
4660   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4661   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4662   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4663   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4664   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4665   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4666   for (i=0; i<m; i++) {
4667     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4668     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4669     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4670   }
4671   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4672   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4673 
4674   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4675   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4676   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4677   sprintf(name,"%s.%d",outfile,rank);
4678   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4679   ierr = PetscFree(name);CHKERRQ(ierr);
4680   ierr = MatView(B,out);CHKERRQ(ierr);
4681   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4682   ierr = MatDestroy(&B);CHKERRQ(ierr);
4683   PetscFunctionReturn(0);
4684 }
4685 
4686 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4687 {
4688   PetscErrorCode      ierr;
4689   Mat_Merge_SeqsToMPI *merge;
4690   PetscContainer      container;
4691 
4692   PetscFunctionBegin;
4693   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4694   if (container) {
4695     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4696     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4697     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4698     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4699     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4700     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4701     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4702     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4703     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4704     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4705     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4706     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4707     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4708     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4709     ierr = PetscFree(merge);CHKERRQ(ierr);
4710     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4711   }
4712   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4713   PetscFunctionReturn(0);
4714 }
4715 
4716 #include <../src/mat/utils/freespace.h>
4717 #include <petscbt.h>
4718 
4719 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4720 {
4721   PetscErrorCode      ierr;
4722   MPI_Comm            comm;
4723   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4724   PetscMPIInt         size,rank,taga,*len_s;
4725   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4726   PetscInt            proc,m;
4727   PetscInt            **buf_ri,**buf_rj;
4728   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4729   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4730   MPI_Request         *s_waits,*r_waits;
4731   MPI_Status          *status;
4732   MatScalar           *aa=a->a;
4733   MatScalar           **abuf_r,*ba_i;
4734   Mat_Merge_SeqsToMPI *merge;
4735   PetscContainer      container;
4736 
4737   PetscFunctionBegin;
4738   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4739   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4740 
4741   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4742   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4743 
4744   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4745   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4746 
4747   bi     = merge->bi;
4748   bj     = merge->bj;
4749   buf_ri = merge->buf_ri;
4750   buf_rj = merge->buf_rj;
4751 
4752   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4753   owners = merge->rowmap->range;
4754   len_s  = merge->len_s;
4755 
4756   /* send and recv matrix values */
4757   /*-----------------------------*/
4758   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4759   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4760 
4761   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4762   for (proc=0,k=0; proc<size; proc++) {
4763     if (!len_s[proc]) continue;
4764     i    = owners[proc];
4765     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4766     k++;
4767   }
4768 
4769   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4770   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4771   ierr = PetscFree(status);CHKERRQ(ierr);
4772 
4773   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4774   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4775 
4776   /* insert mat values of mpimat */
4777   /*----------------------------*/
4778   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4779   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4780 
4781   for (k=0; k<merge->nrecv; k++) {
4782     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4783     nrows       = *(buf_ri_k[k]);
4784     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4785     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4786   }
4787 
4788   /* set values of ba */
4789   m = merge->rowmap->n;
4790   for (i=0; i<m; i++) {
4791     arow = owners[rank] + i;
4792     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4793     bnzi = bi[i+1] - bi[i];
4794     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4795 
4796     /* add local non-zero vals of this proc's seqmat into ba */
4797     anzi   = ai[arow+1] - ai[arow];
4798     aj     = a->j + ai[arow];
4799     aa     = a->a + ai[arow];
4800     nextaj = 0;
4801     for (j=0; nextaj<anzi; j++) {
4802       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4803         ba_i[j] += aa[nextaj++];
4804       }
4805     }
4806 
4807     /* add received vals into ba */
4808     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4809       /* i-th row */
4810       if (i == *nextrow[k]) {
4811         anzi   = *(nextai[k]+1) - *nextai[k];
4812         aj     = buf_rj[k] + *(nextai[k]);
4813         aa     = abuf_r[k] + *(nextai[k]);
4814         nextaj = 0;
4815         for (j=0; nextaj<anzi; j++) {
4816           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4817             ba_i[j] += aa[nextaj++];
4818           }
4819         }
4820         nextrow[k]++; nextai[k]++;
4821       }
4822     }
4823     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4824   }
4825   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4826   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4827 
4828   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4829   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4830   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4831   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4832   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4833   PetscFunctionReturn(0);
4834 }
4835 
4836 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4837 {
4838   PetscErrorCode      ierr;
4839   Mat                 B_mpi;
4840   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4841   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4842   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4843   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4844   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4845   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4846   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4847   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4848   MPI_Status          *status;
4849   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4850   PetscBT             lnkbt;
4851   Mat_Merge_SeqsToMPI *merge;
4852   PetscContainer      container;
4853 
4854   PetscFunctionBegin;
4855   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4856 
4857   /* make sure it is a PETSc comm */
4858   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4859   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4860   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4861 
4862   ierr = PetscNew(&merge);CHKERRQ(ierr);
4863   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4864 
4865   /* determine row ownership */
4866   /*---------------------------------------------------------*/
4867   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4868   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4869   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4870   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4871   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4872   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4873   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4874 
4875   m      = merge->rowmap->n;
4876   owners = merge->rowmap->range;
4877 
4878   /* determine the number of messages to send, their lengths */
4879   /*---------------------------------------------------------*/
4880   len_s = merge->len_s;
4881 
4882   len          = 0; /* length of buf_si[] */
4883   merge->nsend = 0;
4884   for (proc=0; proc<size; proc++) {
4885     len_si[proc] = 0;
4886     if (proc == rank) {
4887       len_s[proc] = 0;
4888     } else {
4889       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4890       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4891     }
4892     if (len_s[proc]) {
4893       merge->nsend++;
4894       nrows = 0;
4895       for (i=owners[proc]; i<owners[proc+1]; i++) {
4896         if (ai[i+1] > ai[i]) nrows++;
4897       }
4898       len_si[proc] = 2*(nrows+1);
4899       len         += len_si[proc];
4900     }
4901   }
4902 
4903   /* determine the number and length of messages to receive for ij-structure */
4904   /*-------------------------------------------------------------------------*/
4905   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4906   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4907 
4908   /* post the Irecv of j-structure */
4909   /*-------------------------------*/
4910   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4911   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4912 
4913   /* post the Isend of j-structure */
4914   /*--------------------------------*/
4915   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4916 
4917   for (proc=0, k=0; proc<size; proc++) {
4918     if (!len_s[proc]) continue;
4919     i    = owners[proc];
4920     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4921     k++;
4922   }
4923 
4924   /* receives and sends of j-structure are complete */
4925   /*------------------------------------------------*/
4926   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4927   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4928 
4929   /* send and recv i-structure */
4930   /*---------------------------*/
4931   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4932   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4933 
4934   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4935   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4936   for (proc=0,k=0; proc<size; proc++) {
4937     if (!len_s[proc]) continue;
4938     /* form outgoing message for i-structure:
4939          buf_si[0]:                 nrows to be sent
4940                [1:nrows]:           row index (global)
4941                [nrows+1:2*nrows+1]: i-structure index
4942     */
4943     /*-------------------------------------------*/
4944     nrows       = len_si[proc]/2 - 1;
4945     buf_si_i    = buf_si + nrows+1;
4946     buf_si[0]   = nrows;
4947     buf_si_i[0] = 0;
4948     nrows       = 0;
4949     for (i=owners[proc]; i<owners[proc+1]; i++) {
4950       anzi = ai[i+1] - ai[i];
4951       if (anzi) {
4952         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4953         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4954         nrows++;
4955       }
4956     }
4957     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4958     k++;
4959     buf_si += len_si[proc];
4960   }
4961 
4962   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4963   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4964 
4965   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4966   for (i=0; i<merge->nrecv; i++) {
4967     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4968   }
4969 
4970   ierr = PetscFree(len_si);CHKERRQ(ierr);
4971   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4972   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4973   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4974   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4975   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4976   ierr = PetscFree(status);CHKERRQ(ierr);
4977 
4978   /* compute a local seq matrix in each processor */
4979   /*----------------------------------------------*/
4980   /* allocate bi array and free space for accumulating nonzero column info */
4981   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4982   bi[0] = 0;
4983 
4984   /* create and initialize a linked list */
4985   nlnk = N+1;
4986   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4987 
4988   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4989   len  = ai[owners[rank+1]] - ai[owners[rank]];
4990   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4991 
4992   current_space = free_space;
4993 
4994   /* determine symbolic info for each local row */
4995   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4996 
4997   for (k=0; k<merge->nrecv; k++) {
4998     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4999     nrows       = *buf_ri_k[k];
5000     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5001     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5002   }
5003 
5004   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5005   len  = 0;
5006   for (i=0; i<m; i++) {
5007     bnzi = 0;
5008     /* add local non-zero cols of this proc's seqmat into lnk */
5009     arow  = owners[rank] + i;
5010     anzi  = ai[arow+1] - ai[arow];
5011     aj    = a->j + ai[arow];
5012     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5013     bnzi += nlnk;
5014     /* add received col data into lnk */
5015     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5016       if (i == *nextrow[k]) { /* i-th row */
5017         anzi  = *(nextai[k]+1) - *nextai[k];
5018         aj    = buf_rj[k] + *nextai[k];
5019         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5020         bnzi += nlnk;
5021         nextrow[k]++; nextai[k]++;
5022       }
5023     }
5024     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5025 
5026     /* if free space is not available, make more free space */
5027     if (current_space->local_remaining<bnzi) {
5028       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5029       nspacedouble++;
5030     }
5031     /* copy data into free space, then initialize lnk */
5032     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5033     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5034 
5035     current_space->array           += bnzi;
5036     current_space->local_used      += bnzi;
5037     current_space->local_remaining -= bnzi;
5038 
5039     bi[i+1] = bi[i] + bnzi;
5040   }
5041 
5042   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5043 
5044   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5045   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5046   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5047 
5048   /* create symbolic parallel matrix B_mpi */
5049   /*---------------------------------------*/
5050   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5051   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5052   if (n==PETSC_DECIDE) {
5053     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5054   } else {
5055     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5056   }
5057   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5058   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5059   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5060   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5061   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5062 
5063   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5064   B_mpi->assembled    = PETSC_FALSE;
5065   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5066   merge->bi           = bi;
5067   merge->bj           = bj;
5068   merge->buf_ri       = buf_ri;
5069   merge->buf_rj       = buf_rj;
5070   merge->coi          = NULL;
5071   merge->coj          = NULL;
5072   merge->owners_co    = NULL;
5073 
5074   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5075 
5076   /* attach the supporting struct to B_mpi for reuse */
5077   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5078   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5079   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5080   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5081   *mpimat = B_mpi;
5082 
5083   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5084   PetscFunctionReturn(0);
5085 }
5086 
5087 /*@C
5088       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5089                  matrices from each processor
5090 
5091     Collective
5092 
5093    Input Parameters:
5094 +    comm - the communicators the parallel matrix will live on
5095 .    seqmat - the input sequential matrices
5096 .    m - number of local rows (or PETSC_DECIDE)
5097 .    n - number of local columns (or PETSC_DECIDE)
5098 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5099 
5100    Output Parameter:
5101 .    mpimat - the parallel matrix generated
5102 
5103     Level: advanced
5104 
5105    Notes:
5106      The dimensions of the sequential matrix in each processor MUST be the same.
5107      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5108      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5109 @*/
5110 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5111 {
5112   PetscErrorCode ierr;
5113   PetscMPIInt    size;
5114 
5115   PetscFunctionBegin;
5116   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5117   if (size == 1) {
5118     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5119     if (scall == MAT_INITIAL_MATRIX) {
5120       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5121     } else {
5122       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5123     }
5124     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5125     PetscFunctionReturn(0);
5126   }
5127   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5128   if (scall == MAT_INITIAL_MATRIX) {
5129     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5130   }
5131   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5132   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5133   PetscFunctionReturn(0);
5134 }
5135 
5136 /*@
5137      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5138           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5139           with MatGetSize()
5140 
5141     Not Collective
5142 
5143    Input Parameters:
5144 +    A - the matrix
5145 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5146 
5147    Output Parameter:
5148 .    A_loc - the local sequential matrix generated
5149 
5150     Level: developer
5151 
5152 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5153 
5154 @*/
5155 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5156 {
5157   PetscErrorCode ierr;
5158   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5159   Mat_SeqAIJ     *mat,*a,*b;
5160   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5161   MatScalar      *aa,*ba,*cam;
5162   PetscScalar    *ca;
5163   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5164   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5165   PetscBool      match;
5166   MPI_Comm       comm;
5167   PetscMPIInt    size;
5168 
5169   PetscFunctionBegin;
5170   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5171   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5172   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5174   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5175 
5176   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5177   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5178   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5179   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5180   aa = a->a; ba = b->a;
5181   if (scall == MAT_INITIAL_MATRIX) {
5182     if (size == 1) {
5183       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5184       PetscFunctionReturn(0);
5185     }
5186 
5187     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5188     ci[0] = 0;
5189     for (i=0; i<am; i++) {
5190       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5191     }
5192     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5193     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5194     k    = 0;
5195     for (i=0; i<am; i++) {
5196       ncols_o = bi[i+1] - bi[i];
5197       ncols_d = ai[i+1] - ai[i];
5198       /* off-diagonal portion of A */
5199       for (jo=0; jo<ncols_o; jo++) {
5200         col = cmap[*bj];
5201         if (col >= cstart) break;
5202         cj[k]   = col; bj++;
5203         ca[k++] = *ba++;
5204       }
5205       /* diagonal portion of A */
5206       for (j=0; j<ncols_d; j++) {
5207         cj[k]   = cstart + *aj++;
5208         ca[k++] = *aa++;
5209       }
5210       /* off-diagonal portion of A */
5211       for (j=jo; j<ncols_o; j++) {
5212         cj[k]   = cmap[*bj++];
5213         ca[k++] = *ba++;
5214       }
5215     }
5216     /* put together the new matrix */
5217     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5218     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5219     /* Since these are PETSc arrays, change flags to free them as necessary. */
5220     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5221     mat->free_a  = PETSC_TRUE;
5222     mat->free_ij = PETSC_TRUE;
5223     mat->nonew   = 0;
5224   } else if (scall == MAT_REUSE_MATRIX) {
5225     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5226     ci = mat->i; cj = mat->j; cam = mat->a;
5227     for (i=0; i<am; i++) {
5228       /* off-diagonal portion of A */
5229       ncols_o = bi[i+1] - bi[i];
5230       for (jo=0; jo<ncols_o; jo++) {
5231         col = cmap[*bj];
5232         if (col >= cstart) break;
5233         *cam++ = *ba++; bj++;
5234       }
5235       /* diagonal portion of A */
5236       ncols_d = ai[i+1] - ai[i];
5237       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5238       /* off-diagonal portion of A */
5239       for (j=jo; j<ncols_o; j++) {
5240         *cam++ = *ba++; bj++;
5241       }
5242     }
5243   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5244   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5245   PetscFunctionReturn(0);
5246 }
5247 
5248 /*@C
5249      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5250 
5251     Not Collective
5252 
5253    Input Parameters:
5254 +    A - the matrix
5255 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5256 -    row, col - index sets of rows and columns to extract (or NULL)
5257 
5258    Output Parameter:
5259 .    A_loc - the local sequential matrix generated
5260 
5261     Level: developer
5262 
5263 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5264 
5265 @*/
5266 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5267 {
5268   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5269   PetscErrorCode ierr;
5270   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5271   IS             isrowa,iscola;
5272   Mat            *aloc;
5273   PetscBool      match;
5274 
5275   PetscFunctionBegin;
5276   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5277   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5278   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5279   if (!row) {
5280     start = A->rmap->rstart; end = A->rmap->rend;
5281     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5282   } else {
5283     isrowa = *row;
5284   }
5285   if (!col) {
5286     start = A->cmap->rstart;
5287     cmap  = a->garray;
5288     nzA   = a->A->cmap->n;
5289     nzB   = a->B->cmap->n;
5290     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5291     ncols = 0;
5292     for (i=0; i<nzB; i++) {
5293       if (cmap[i] < start) idx[ncols++] = cmap[i];
5294       else break;
5295     }
5296     imark = i;
5297     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5298     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5299     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5300   } else {
5301     iscola = *col;
5302   }
5303   if (scall != MAT_INITIAL_MATRIX) {
5304     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5305     aloc[0] = *A_loc;
5306   }
5307   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5308   if (!col) { /* attach global id of condensed columns */
5309     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5310   }
5311   *A_loc = aloc[0];
5312   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5313   if (!row) {
5314     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5315   }
5316   if (!col) {
5317     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5318   }
5319   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5320   PetscFunctionReturn(0);
5321 }
5322 
5323 /*
5324  * Destroy a mat that may be composed with PetscSF communication objects.
5325  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5326  * */
5327 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5328 {
5329   PetscSF          sf,osf;
5330   IS               map;
5331   PetscErrorCode   ierr;
5332 
5333   PetscFunctionBegin;
5334   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5335   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5336   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5337   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5338   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5339   ierr = ISDestroy(&map);CHKERRQ(ierr);
5340   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5341   PetscFunctionReturn(0);
5342 }
5343 
5344 /*
5345  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5346  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5347  * on a global size.
5348  * */
5349 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5350 {
5351   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5352   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5353   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol;
5354   PetscSFNode              *iremote,*oiremote;
5355   const PetscInt           *lrowindices;
5356   PetscErrorCode           ierr;
5357   PetscSF                  sf,osf;
5358   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5359   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5360   MPI_Comm                 comm;
5361   ISLocalToGlobalMapping   mapping;
5362 
5363   PetscFunctionBegin;
5364   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5365   /* plocalsize is the number of roots
5366    * nrows is the number of leaves
5367    * */
5368   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5369   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5370   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5371   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5372   for (i=0;i<nrows;i++) {
5373     /* Find a remote index and an owner for a row
5374      * The row could be local or remote
5375      * */
5376     owner = 0;
5377     lidx  = 0;
5378     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5379     iremote[i].index = lidx;
5380     iremote[i].rank  = owner;
5381   }
5382   /* Create SF to communicate how many nonzero columns for each row */
5383   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5384   /* SF will figure out the number of nonzero colunms for each row, and their
5385    * offsets
5386    * */
5387   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5388   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5389   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5390 
5391   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5392   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5393   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5394   roffsets[0] = 0;
5395   roffsets[1] = 0;
5396   for (i=0;i<plocalsize;i++) {
5397     /* diag */
5398     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5399     /* off diag */
5400     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5401     /* compute offsets so that we relative location for each row */
5402     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5403     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5404   }
5405   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5406   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5407   /* 'r' means root, and 'l' means leaf */
5408   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5409   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5410   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5411   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5412   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5413   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5414   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5415   dntotalcols = 0;
5416   ontotalcols = 0;
5417   ncol = 0;
5418   for (i=0;i<nrows;i++) {
5419     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5420     ncol = PetscMax(pnnz[i],ncol);
5421     /* diag */
5422     dntotalcols += nlcols[i*2+0];
5423     /* off diag */
5424     ontotalcols += nlcols[i*2+1];
5425   }
5426   /* We do not need to figure the right number of columns
5427    * since all the calculations will be done by going through the raw data
5428    * */
5429   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5430   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5431   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5432   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5433   /* diag */
5434   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5435   /* off diag */
5436   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5437   /* diag */
5438   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5439   /* off diag */
5440   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5441   dntotalcols = 0;
5442   ontotalcols = 0;
5443   ntotalcols  = 0;
5444   for (i=0;i<nrows;i++) {
5445     owner = 0;
5446     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5447     /* Set iremote for diag matrix */
5448     for (j=0;j<nlcols[i*2+0];j++) {
5449       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5450       iremote[dntotalcols].rank    = owner;
5451       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5452       ilocal[dntotalcols++]        = ntotalcols++;
5453     }
5454     /* off diag */
5455     for (j=0;j<nlcols[i*2+1];j++) {
5456       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5457       oiremote[ontotalcols].rank    = owner;
5458       oilocal[ontotalcols++]        = ntotalcols++;
5459     }
5460   }
5461   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5462   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5463   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5464   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5465   /* P serves as roots and P_oth is leaves
5466    * Diag matrix
5467    * */
5468   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5469   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5470   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5471 
5472   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5473   /* Off diag */
5474   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5475   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5476   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5477   /* We operate on the matrix internal data for saving memory */
5478   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5479   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5480   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5481   /* Convert to global indices for diag matrix */
5482   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5483   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5484   /* We want P_oth store global indices */
5485   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5486   /* Use memory scalable approach */
5487   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5488   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5489   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5490   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5491   /* Convert back to local indices */
5492   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5493   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5494   nout = 0;
5495   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5496   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5497   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5498   /* Exchange values */
5499   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5500   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5501   /* Stop PETSc from shrinking memory */
5502   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5503   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5504   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5505   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5506   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5507   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5508   /* ``New MatDestroy" takes care of PetscSF objects as well */
5509   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5510   PetscFunctionReturn(0);
5511 }
5512 
5513 /*
5514  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5515  * This supports MPIAIJ and MAIJ
5516  * */
5517 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5518 {
5519   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5520   Mat_SeqAIJ            *p_oth;
5521   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5522   IS                    rows,map;
5523   PetscHMapI            hamp;
5524   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5525   MPI_Comm              comm;
5526   PetscSF               sf,osf;
5527   PetscBool             has;
5528   PetscErrorCode        ierr;
5529 
5530   PetscFunctionBegin;
5531   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5532   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5533   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5534    *  and then create a submatrix (that often is an overlapping matrix)
5535    * */
5536   if (reuse==MAT_INITIAL_MATRIX) {
5537     /* Use a hash table to figure out unique keys */
5538     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5539     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5540     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5541     count = 0;
5542     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5543     for (i=0;i<a->B->cmap->n;i++) {
5544       key  = a->garray[i]/dof;
5545       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5546       if (!has) {
5547         mapping[i] = count;
5548         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5549       } else {
5550         /* Current 'i' has the same value the previous step */
5551         mapping[i] = count-1;
5552       }
5553     }
5554     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5555     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5556     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5557     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5558     off = 0;
5559     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5560     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5561     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5562     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5563     /* In case, the matrix was already created but users want to recreate the matrix */
5564     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5565     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5566     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5567     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5568   } else if (reuse==MAT_REUSE_MATRIX) {
5569     /* If matrix was already created, we simply update values using SF objects
5570      * that as attached to the matrix ealier.
5571      *  */
5572     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5573     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5574     if (!sf || !osf) {
5575       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5576     }
5577     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5578     /* Update values in place */
5579     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5580     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5581     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5582     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5583   } else {
5584     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5585   }
5586   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5587   PetscFunctionReturn(0);
5588 }
5589 
5590 /*@C
5591     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5592 
5593     Collective on Mat
5594 
5595    Input Parameters:
5596 +    A,B - the matrices in mpiaij format
5597 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5598 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5599 
5600    Output Parameter:
5601 +    rowb, colb - index sets of rows and columns of B to extract
5602 -    B_seq - the sequential matrix generated
5603 
5604     Level: developer
5605 
5606 @*/
5607 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5608 {
5609   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5610   PetscErrorCode ierr;
5611   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5612   IS             isrowb,iscolb;
5613   Mat            *bseq=NULL;
5614 
5615   PetscFunctionBegin;
5616   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5617     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5618   }
5619   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5620 
5621   if (scall == MAT_INITIAL_MATRIX) {
5622     start = A->cmap->rstart;
5623     cmap  = a->garray;
5624     nzA   = a->A->cmap->n;
5625     nzB   = a->B->cmap->n;
5626     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5627     ncols = 0;
5628     for (i=0; i<nzB; i++) {  /* row < local row index */
5629       if (cmap[i] < start) idx[ncols++] = cmap[i];
5630       else break;
5631     }
5632     imark = i;
5633     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5634     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5635     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5636     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5637   } else {
5638     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5639     isrowb  = *rowb; iscolb = *colb;
5640     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5641     bseq[0] = *B_seq;
5642   }
5643   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5644   *B_seq = bseq[0];
5645   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5646   if (!rowb) {
5647     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5648   } else {
5649     *rowb = isrowb;
5650   }
5651   if (!colb) {
5652     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5653   } else {
5654     *colb = iscolb;
5655   }
5656   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5657   PetscFunctionReturn(0);
5658 }
5659 
5660 /*
5661     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5662     of the OFF-DIAGONAL portion of local A
5663 
5664     Collective on Mat
5665 
5666    Input Parameters:
5667 +    A,B - the matrices in mpiaij format
5668 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5669 
5670    Output Parameter:
5671 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5672 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5673 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5674 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5675 
5676     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5677      for this matrix. This is not desirable..
5678 
5679     Level: developer
5680 
5681 */
5682 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5683 {
5684   PetscErrorCode         ierr;
5685   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5686   Mat_SeqAIJ             *b_oth;
5687   VecScatter             ctx;
5688   MPI_Comm               comm;
5689   const PetscMPIInt      *rprocs,*sprocs;
5690   const PetscInt         *srow,*rstarts,*sstarts;
5691   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5692   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5693   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5694   MPI_Request            *rwaits = NULL,*swaits = NULL;
5695   MPI_Status             rstatus;
5696   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5697 
5698   PetscFunctionBegin;
5699   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5700   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5701 
5702   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5703     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5704   }
5705   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5706   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5707 
5708   if (size == 1) {
5709     startsj_s = NULL;
5710     bufa_ptr  = NULL;
5711     *B_oth    = NULL;
5712     PetscFunctionReturn(0);
5713   }
5714 
5715   ctx = a->Mvctx;
5716   tag = ((PetscObject)ctx)->tag;
5717 
5718   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5719   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5720   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5721   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5722   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5723   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5724   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5725 
5726   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5727   if (scall == MAT_INITIAL_MATRIX) {
5728     /* i-array */
5729     /*---------*/
5730     /*  post receives */
5731     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5732     for (i=0; i<nrecvs; i++) {
5733       rowlen = rvalues + rstarts[i]*rbs;
5734       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5735       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5736     }
5737 
5738     /* pack the outgoing message */
5739     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5740 
5741     sstartsj[0] = 0;
5742     rstartsj[0] = 0;
5743     len         = 0; /* total length of j or a array to be sent */
5744     if (nsends) {
5745       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5746       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5747     }
5748     for (i=0; i<nsends; i++) {
5749       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5750       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5751       for (j=0; j<nrows; j++) {
5752         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5753         for (l=0; l<sbs; l++) {
5754           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5755 
5756           rowlen[j*sbs+l] = ncols;
5757 
5758           len += ncols;
5759           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5760         }
5761         k++;
5762       }
5763       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5764 
5765       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5766     }
5767     /* recvs and sends of i-array are completed */
5768     i = nrecvs;
5769     while (i--) {
5770       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5771     }
5772     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5773     ierr = PetscFree(svalues);CHKERRQ(ierr);
5774 
5775     /* allocate buffers for sending j and a arrays */
5776     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5777     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5778 
5779     /* create i-array of B_oth */
5780     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5781 
5782     b_othi[0] = 0;
5783     len       = 0; /* total length of j or a array to be received */
5784     k         = 0;
5785     for (i=0; i<nrecvs; i++) {
5786       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5787       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5788       for (j=0; j<nrows; j++) {
5789         b_othi[k+1] = b_othi[k] + rowlen[j];
5790         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5791         k++;
5792       }
5793       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5794     }
5795     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5796 
5797     /* allocate space for j and a arrrays of B_oth */
5798     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5799     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5800 
5801     /* j-array */
5802     /*---------*/
5803     /*  post receives of j-array */
5804     for (i=0; i<nrecvs; i++) {
5805       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5806       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5807     }
5808 
5809     /* pack the outgoing message j-array */
5810     if (nsends) k = sstarts[0];
5811     for (i=0; i<nsends; i++) {
5812       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5813       bufJ  = bufj+sstartsj[i];
5814       for (j=0; j<nrows; j++) {
5815         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5816         for (ll=0; ll<sbs; ll++) {
5817           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5818           for (l=0; l<ncols; l++) {
5819             *bufJ++ = cols[l];
5820           }
5821           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5822         }
5823       }
5824       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5825     }
5826 
5827     /* recvs and sends of j-array are completed */
5828     i = nrecvs;
5829     while (i--) {
5830       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5831     }
5832     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5833   } else if (scall == MAT_REUSE_MATRIX) {
5834     sstartsj = *startsj_s;
5835     rstartsj = *startsj_r;
5836     bufa     = *bufa_ptr;
5837     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5838     b_otha   = b_oth->a;
5839   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5840 
5841   /* a-array */
5842   /*---------*/
5843   /*  post receives of a-array */
5844   for (i=0; i<nrecvs; i++) {
5845     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5846     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5847   }
5848 
5849   /* pack the outgoing message a-array */
5850   if (nsends) k = sstarts[0];
5851   for (i=0; i<nsends; i++) {
5852     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5853     bufA  = bufa+sstartsj[i];
5854     for (j=0; j<nrows; j++) {
5855       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5856       for (ll=0; ll<sbs; ll++) {
5857         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5858         for (l=0; l<ncols; l++) {
5859           *bufA++ = vals[l];
5860         }
5861         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5862       }
5863     }
5864     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5865   }
5866   /* recvs and sends of a-array are completed */
5867   i = nrecvs;
5868   while (i--) {
5869     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5870   }
5871   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5872   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5873 
5874   if (scall == MAT_INITIAL_MATRIX) {
5875     /* put together the new matrix */
5876     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5877 
5878     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5879     /* Since these are PETSc arrays, change flags to free them as necessary. */
5880     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5881     b_oth->free_a  = PETSC_TRUE;
5882     b_oth->free_ij = PETSC_TRUE;
5883     b_oth->nonew   = 0;
5884 
5885     ierr = PetscFree(bufj);CHKERRQ(ierr);
5886     if (!startsj_s || !bufa_ptr) {
5887       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5888       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5889     } else {
5890       *startsj_s = sstartsj;
5891       *startsj_r = rstartsj;
5892       *bufa_ptr  = bufa;
5893     }
5894   }
5895 
5896   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5897   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5898   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5899   PetscFunctionReturn(0);
5900 }
5901 
5902 /*@C
5903   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5904 
5905   Not Collective
5906 
5907   Input Parameters:
5908 . A - The matrix in mpiaij format
5909 
5910   Output Parameter:
5911 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5912 . colmap - A map from global column index to local index into lvec
5913 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5914 
5915   Level: developer
5916 
5917 @*/
5918 #if defined(PETSC_USE_CTABLE)
5919 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5920 #else
5921 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5922 #endif
5923 {
5924   Mat_MPIAIJ *a;
5925 
5926   PetscFunctionBegin;
5927   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5928   PetscValidPointer(lvec, 2);
5929   PetscValidPointer(colmap, 3);
5930   PetscValidPointer(multScatter, 4);
5931   a = (Mat_MPIAIJ*) A->data;
5932   if (lvec) *lvec = a->lvec;
5933   if (colmap) *colmap = a->colmap;
5934   if (multScatter) *multScatter = a->Mvctx;
5935   PetscFunctionReturn(0);
5936 }
5937 
5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5941 #if defined(PETSC_HAVE_MKL_SPARSE)
5942 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5943 #endif
5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5945 #if defined(PETSC_HAVE_ELEMENTAL)
5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5947 #endif
5948 #if defined(PETSC_HAVE_HYPRE)
5949 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5950 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5951 #endif
5952 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5954 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5955 
5956 /*
5957     Computes (B'*A')' since computing B*A directly is untenable
5958 
5959                n                       p                          p
5960         (              )       (              )         (                  )
5961       m (      A       )  *  n (       B      )   =   m (         C        )
5962         (              )       (              )         (                  )
5963 
5964 */
5965 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5966 {
5967   PetscErrorCode ierr;
5968   Mat            At,Bt,Ct;
5969 
5970   PetscFunctionBegin;
5971   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5972   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5973   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5974   ierr = MatDestroy(&At);CHKERRQ(ierr);
5975   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5976   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5977   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5978   PetscFunctionReturn(0);
5979 }
5980 
5981 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5982 {
5983   PetscErrorCode ierr;
5984   PetscInt       m=A->rmap->n,n=B->cmap->n;
5985   Mat            Cmat;
5986 
5987   PetscFunctionBegin;
5988   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5989   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5990   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5991   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5992   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5993   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5994   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5995   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5996 
5997   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5998 
5999   *C = Cmat;
6000   PetscFunctionReturn(0);
6001 }
6002 
6003 /* ----------------------------------------------------------------*/
6004 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
6005 {
6006   PetscErrorCode ierr;
6007 
6008   PetscFunctionBegin;
6009   if (scall == MAT_INITIAL_MATRIX) {
6010     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
6011     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
6012     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
6013   }
6014   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
6015   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
6016   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
6017   PetscFunctionReturn(0);
6018 }
6019 
6020 /*MC
6021    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6022 
6023    Options Database Keys:
6024 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6025 
6026    Level: beginner
6027 
6028    Notes:
6029     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6030     in this case the values associated with the rows and columns one passes in are set to zero
6031     in the matrix
6032 
6033     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6034     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6035 
6036 .seealso: MatCreateAIJ()
6037 M*/
6038 
6039 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6040 {
6041   Mat_MPIAIJ     *b;
6042   PetscErrorCode ierr;
6043   PetscMPIInt    size;
6044 
6045   PetscFunctionBegin;
6046   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6047 
6048   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6049   B->data       = (void*)b;
6050   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6051   B->assembled  = PETSC_FALSE;
6052   B->insertmode = NOT_SET_VALUES;
6053   b->size       = size;
6054 
6055   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6056 
6057   /* build cache for off array entries formed */
6058   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6059 
6060   b->donotstash  = PETSC_FALSE;
6061   b->colmap      = 0;
6062   b->garray      = 0;
6063   b->roworiented = PETSC_TRUE;
6064 
6065   /* stuff used for matrix vector multiply */
6066   b->lvec  = NULL;
6067   b->Mvctx = NULL;
6068 
6069   /* stuff for MatGetRow() */
6070   b->rowindices   = 0;
6071   b->rowvalues    = 0;
6072   b->getrowactive = PETSC_FALSE;
6073 
6074   /* flexible pointer used in CUSP/CUSPARSE classes */
6075   b->spptr = NULL;
6076 
6077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6087 #if defined(PETSC_HAVE_MKL_SPARSE)
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6089 #endif
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6092 #if defined(PETSC_HAVE_ELEMENTAL)
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6094 #endif
6095 #if defined(PETSC_HAVE_HYPRE)
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6097 #endif
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6103 #if defined(PETSC_HAVE_HYPRE)
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6105 #endif
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6107   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6108   PetscFunctionReturn(0);
6109 }
6110 
6111 /*@C
6112      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6113          and "off-diagonal" part of the matrix in CSR format.
6114 
6115    Collective
6116 
6117    Input Parameters:
6118 +  comm - MPI communicator
6119 .  m - number of local rows (Cannot be PETSC_DECIDE)
6120 .  n - This value should be the same as the local size used in creating the
6121        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6122        calculated if N is given) For square matrices n is almost always m.
6123 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6124 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6125 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6126 .   j - column indices
6127 .   a - matrix values
6128 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6129 .   oj - column indices
6130 -   oa - matrix values
6131 
6132    Output Parameter:
6133 .   mat - the matrix
6134 
6135    Level: advanced
6136 
6137    Notes:
6138        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6139        must free the arrays once the matrix has been destroyed and not before.
6140 
6141        The i and j indices are 0 based
6142 
6143        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6144 
6145        This sets local rows and cannot be used to set off-processor values.
6146 
6147        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6148        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6149        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6150        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6151        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6152        communication if it is known that only local entries will be set.
6153 
6154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6155           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6156 @*/
6157 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6158 {
6159   PetscErrorCode ierr;
6160   Mat_MPIAIJ     *maij;
6161 
6162   PetscFunctionBegin;
6163   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6164   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6165   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6166   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6167   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6168   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6169   maij = (Mat_MPIAIJ*) (*mat)->data;
6170 
6171   (*mat)->preallocated = PETSC_TRUE;
6172 
6173   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6174   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6175 
6176   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6177   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6178 
6179   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6180   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6181   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6182   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6183 
6184   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6185   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6186   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6187   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6188   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6189   PetscFunctionReturn(0);
6190 }
6191 
6192 /*
6193     Special version for direct calls from Fortran
6194 */
6195 #include <petsc/private/fortranimpl.h>
6196 
6197 /* Change these macros so can be used in void function */
6198 #undef CHKERRQ
6199 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6200 #undef SETERRQ2
6201 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6202 #undef SETERRQ3
6203 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6204 #undef SETERRQ
6205 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6206 
6207 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6208 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6209 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6210 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6211 #else
6212 #endif
6213 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6214 {
6215   Mat            mat  = *mmat;
6216   PetscInt       m    = *mm, n = *mn;
6217   InsertMode     addv = *maddv;
6218   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6219   PetscScalar    value;
6220   PetscErrorCode ierr;
6221 
6222   MatCheckPreallocated(mat,1);
6223   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6224 
6225 #if defined(PETSC_USE_DEBUG)
6226   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6227 #endif
6228   {
6229     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6230     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6231     PetscBool roworiented = aij->roworiented;
6232 
6233     /* Some Variables required in the macro */
6234     Mat        A                    = aij->A;
6235     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6236     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6237     MatScalar  *aa                  = a->a;
6238     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6239     Mat        B                    = aij->B;
6240     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6241     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6242     MatScalar  *ba                  = b->a;
6243     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6244      * cannot use "#if defined" inside a macro. */
6245     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6246 
6247     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6248     PetscInt  nonew = a->nonew;
6249     MatScalar *ap1,*ap2;
6250 
6251     PetscFunctionBegin;
6252     for (i=0; i<m; i++) {
6253       if (im[i] < 0) continue;
6254 #if defined(PETSC_USE_DEBUG)
6255       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6256 #endif
6257       if (im[i] >= rstart && im[i] < rend) {
6258         row      = im[i] - rstart;
6259         lastcol1 = -1;
6260         rp1      = aj + ai[row];
6261         ap1      = aa + ai[row];
6262         rmax1    = aimax[row];
6263         nrow1    = ailen[row];
6264         low1     = 0;
6265         high1    = nrow1;
6266         lastcol2 = -1;
6267         rp2      = bj + bi[row];
6268         ap2      = ba + bi[row];
6269         rmax2    = bimax[row];
6270         nrow2    = bilen[row];
6271         low2     = 0;
6272         high2    = nrow2;
6273 
6274         for (j=0; j<n; j++) {
6275           if (roworiented) value = v[i*n+j];
6276           else value = v[i+j*m];
6277           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6278           if (in[j] >= cstart && in[j] < cend) {
6279             col = in[j] - cstart;
6280             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6281 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6282             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6283 #endif
6284           } else if (in[j] < 0) continue;
6285 #if defined(PETSC_USE_DEBUG)
6286           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6287           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6288 #endif
6289           else {
6290             if (mat->was_assembled) {
6291               if (!aij->colmap) {
6292                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6293               }
6294 #if defined(PETSC_USE_CTABLE)
6295               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6296               col--;
6297 #else
6298               col = aij->colmap[in[j]] - 1;
6299 #endif
6300               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6301                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6302                 col  =  in[j];
6303                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6304                 B        = aij->B;
6305                 b        = (Mat_SeqAIJ*)B->data;
6306                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6307                 rp2      = bj + bi[row];
6308                 ap2      = ba + bi[row];
6309                 rmax2    = bimax[row];
6310                 nrow2    = bilen[row];
6311                 low2     = 0;
6312                 high2    = nrow2;
6313                 bm       = aij->B->rmap->n;
6314                 ba       = b->a;
6315                 inserted = PETSC_FALSE;
6316               }
6317             } else col = in[j];
6318             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6319 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6320             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6321 #endif
6322           }
6323         }
6324       } else if (!aij->donotstash) {
6325         if (roworiented) {
6326           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6327         } else {
6328           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6329         }
6330       }
6331     }
6332   }
6333   PetscFunctionReturnVoid();
6334 }
6335