xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 92cfd99a522dd5f5e132443db7e958e78e49e2da)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           goto a_noinsert; \
468         } \
469       }  \
470       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
471       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
472       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
473       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
474       N = nrow1++ - 1; a->nz++; high1++; \
475       /* shift up all the later entries in this row */ \
476       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
477       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
478       rp1[_i] = col;  \
479       ap1[_i] = value;  \
480       A->nonzerostate++;\
481       a_noinsert: ; \
482       ailen[row] = nrow1; \
483 }
484 
485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
486   { \
487     if (col <= lastcol2) low2 = 0;                        \
488     else high2 = nrow2;                                   \
489     lastcol2 = col;                                       \
490     while (high2-low2 > 5) {                              \
491       t = (low2+high2)/2;                                 \
492       if (rp2[t] > col) high2 = t;                        \
493       else             low2  = t;                         \
494     }                                                     \
495     for (_i=low2; _i<high2; _i++) {                       \
496       if (rp2[_i] > col) break;                           \
497       if (rp2[_i] == col) {                               \
498         if (addv == ADD_VALUES) {                         \
499           ap2[_i] += value;                               \
500           (void)PetscLogFlops(1.0);                       \
501         }                                                 \
502         else                    ap2[_i] = value;          \
503         goto b_noinsert;                                  \
504       }                                                   \
505     }                                                     \
506     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
507     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
508     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
509     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
510     N = nrow2++ - 1; b->nz++; high2++;                    \
511     /* shift up all the later entries in this row */      \
512     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
513     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
514     rp2[_i] = col;                                        \
515     ap2[_i] = value;                                      \
516     B->nonzerostate++;                                    \
517     b_noinsert: ;                                         \
518     bilen[row] = nrow2;                                   \
519   }
520 
521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
522 {
523   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
524   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
525   PetscErrorCode ierr;
526   PetscInt       l,*garray = mat->garray,diag;
527 
528   PetscFunctionBegin;
529   /* code only works for square matrices A */
530 
531   /* find size of row to the left of the diagonal part */
532   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
533   row  = row - diag;
534   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
535     if (garray[b->j[b->i[row]+l]] > diag) break;
536   }
537   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
538 
539   /* diagonal part */
540   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
541 
542   /* right of diagonal part */
543   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
548 {
549   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
550   PetscScalar    value = 0.0;
551   PetscErrorCode ierr;
552   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
554   PetscBool      roworiented = aij->roworiented;
555 
556   /* Some Variables required in the macro */
557   Mat        A                 = aij->A;
558   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
559   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
560   MatScalar  *aa               = a->a;
561   PetscBool  ignorezeroentries = a->ignorezeroentries;
562   Mat        B                 = aij->B;
563   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
564   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
565   MatScalar  *ba               = b->a;
566 
567   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
568   PetscInt  nonew;
569   MatScalar *ap1,*ap2;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (im[i] < 0) continue;
574 #if defined(PETSC_USE_DEBUG)
575     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
576 #endif
577     if (im[i] >= rstart && im[i] < rend) {
578       row      = im[i] - rstart;
579       lastcol1 = -1;
580       rp1      = aj + ai[row];
581       ap1      = aa + ai[row];
582       rmax1    = aimax[row];
583       nrow1    = ailen[row];
584       low1     = 0;
585       high1    = nrow1;
586       lastcol2 = -1;
587       rp2      = bj + bi[row];
588       ap2      = ba + bi[row];
589       rmax2    = bimax[row];
590       nrow2    = bilen[row];
591       low2     = 0;
592       high2    = nrow2;
593 
594       for (j=0; j<n; j++) {
595         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
596         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
597         if (in[j] >= cstart && in[j] < cend) {
598           col   = in[j] - cstart;
599           nonew = a->nonew;
600           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
601         } else if (in[j] < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
604 #endif
605         else {
606           if (mat->was_assembled) {
607             if (!aij->colmap) {
608               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609             }
610 #if defined(PETSC_USE_CTABLE)
611             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
612             col--;
613 #else
614             col = aij->colmap[in[j]] - 1;
615 #endif
616             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
617               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
618               col  =  in[j];
619               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
620               B     = aij->B;
621               b     = (Mat_SeqAIJ*)B->data;
622               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
623               rp2   = bj + bi[row];
624               ap2   = ba + bi[row];
625               rmax2 = bimax[row];
626               nrow2 = bilen[row];
627               low2  = 0;
628               high2 = nrow2;
629               bm    = aij->B->rmap->n;
630               ba    = b->a;
631             } else if (col < 0) {
632               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
633                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
634               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
635             }
636           } else col = in[j];
637           nonew = b->nonew;
638           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
639         }
640       }
641     } else {
642       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
643       if (!aij->donotstash) {
644         mat->assembled = PETSC_FALSE;
645         if (roworiented) {
646           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
647         } else {
648           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
649         }
650       }
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
660 */
661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
662 {
663   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
664   Mat            A           = aij->A; /* diagonal part of the matrix */
665   Mat            B           = aij->B; /* offdiagonal part of the matrix */
666   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
667   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
668   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
669   PetscInt       *ailen      = a->ilen,*aj = a->j;
670   PetscInt       *bilen      = b->ilen,*bj = b->j;
671   PetscInt       am          = aij->A->rmap->n,j;
672   PetscInt       diag_so_far = 0,dnz;
673   PetscInt       offd_so_far = 0,onz;
674 
675   PetscFunctionBegin;
676   /* Iterate over all rows of the matrix */
677   for (j=0; j<am; j++) {
678     dnz = onz = 0;
679     /*  Iterate over all non-zero columns of the current row */
680     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
681       /* If column is in the diagonal */
682       if (mat_j[col] >= cstart && mat_j[col] < cend) {
683         aj[diag_so_far++] = mat_j[col] - cstart;
684         dnz++;
685       } else { /* off-diagonal entries */
686         bj[offd_so_far++] = mat_j[col];
687         onz++;
688       }
689     }
690     ailen[j] = dnz;
691     bilen[j] = onz;
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 /*
697     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
698     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
699     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
700     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
701     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
702 */
703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
704 {
705   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
706   Mat            A      = aij->A; /* diagonal part of the matrix */
707   Mat            B      = aij->B; /* offdiagonal part of the matrix */
708   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
709   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
710   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
711   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
712   PetscInt       *ailen = a->ilen,*aj = a->j;
713   PetscInt       *bilen = b->ilen,*bj = b->j;
714   PetscInt       am     = aij->A->rmap->n,j;
715   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
716   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
717   PetscScalar    *aa = a->a,*ba = b->a;
718 
719   PetscFunctionBegin;
720   /* Iterate over all rows of the matrix */
721   for (j=0; j<am; j++) {
722     dnz_row = onz_row = 0;
723     rowstart_offd = full_offd_i[j];
724     rowstart_diag = full_diag_i[j];
725     /*  Iterate over all non-zero columns of the current row */
726     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
727       /* If column is in the diagonal */
728       if (mat_j[col] >= cstart && mat_j[col] < cend) {
729         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
730         aa[rowstart_diag+dnz_row] = mat_a[col];
731         dnz_row++;
732       } else { /* off-diagonal entries */
733         bj[rowstart_offd+onz_row] = mat_j[col];
734         ba[rowstart_offd+onz_row] = mat_a[col];
735         onz_row++;
736       }
737     }
738     ailen[j] = dnz_row;
739     bilen[j] = onz_row;
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
745 {
746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
749   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
750 
751   PetscFunctionBegin;
752   for (i=0; i<m; i++) {
753     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
754     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
755     if (idxm[i] >= rstart && idxm[i] < rend) {
756       row = idxm[i] - rstart;
757       for (j=0; j<n; j++) {
758         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
759         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
760         if (idxn[j] >= cstart && idxn[j] < cend) {
761           col  = idxn[j] - cstart;
762           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763         } else {
764           if (!aij->colmap) {
765             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
766           }
767 #if defined(PETSC_USE_CTABLE)
768           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
769           col--;
770 #else
771           col = aij->colmap[idxn[j]] - 1;
772 #endif
773           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
774           else {
775             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776           }
777         }
778       }
779     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
785 
786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
787 {
788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
789   PetscErrorCode ierr;
790   PetscInt       nstash,reallocs;
791 
792   PetscFunctionBegin;
793   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
794 
795   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
796   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
797   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
805   PetscErrorCode ierr;
806   PetscMPIInt    n;
807   PetscInt       i,j,rstart,ncols,flg;
808   PetscInt       *row,*col;
809   PetscBool      other_disassembled;
810   PetscScalar    *val;
811 
812   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
813 
814   PetscFunctionBegin;
815   if (!aij->donotstash && !mat->nooffprocentries) {
816     while (1) {
817       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
818       if (!flg) break;
819 
820       for (i=0; i<n; ) {
821         /* Now identify the consecutive vals belonging to the same row */
822         for (j=i,rstart=row[j]; j<n; j++) {
823           if (row[j] != rstart) break;
824         }
825         if (j < n) ncols = j-i;
826         else       ncols = n-i;
827         /* Now assemble all these values with a single function call */
828         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
829 
830         i = j;
831       }
832     }
833     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
834   }
835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
836   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
837 #endif
838   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
839   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
840 
841   /* determine if any processor has disassembled, if so we must
842      also disassemble ourself, in order that we may reassemble. */
843   /*
844      if nonzero structure of submatrix B cannot change then we know that
845      no processor disassembled thus we can skip this stuff
846   */
847   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
848     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
849     if (mat->was_assembled && !other_disassembled) {
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
852 #endif
853       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
854     }
855   }
856   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
857     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
858   }
859   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
861   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
862 #endif
863   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
865 
866   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
867 
868   aij->rowvalues = 0;
869 
870   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
871   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
875     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
876     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
877   }
878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
879   mat->offloadmask = PETSC_OFFLOAD_BOTH;
880 #endif
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
885 {
886   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
891   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
896 {
897   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
898   PetscObjectState sA, sB;
899   PetscInt        *lrows;
900   PetscInt         r, len;
901   PetscBool        cong, lch, gch;
902   PetscErrorCode   ierr;
903 
904   PetscFunctionBegin;
905   /* get locally owned rows */
906   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
907   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
908   /* fix right hand side if needed */
909   if (x && b) {
910     const PetscScalar *xx;
911     PetscScalar       *bb;
912 
913     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
914     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
917     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
918     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
919   }
920 
921   sA = mat->A->nonzerostate;
922   sB = mat->B->nonzerostate;
923 
924   if (diag != 0.0 && cong) {
925     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
926     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
927   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
928     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
929     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
930     PetscInt   nnwA, nnwB;
931     PetscBool  nnzA, nnzB;
932 
933     nnwA = aijA->nonew;
934     nnwB = aijB->nonew;
935     nnzA = aijA->keepnonzeropattern;
936     nnzB = aijB->keepnonzeropattern;
937     if (!nnzA) {
938       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
939       aijA->nonew = 0;
940     }
941     if (!nnzB) {
942       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
943       aijB->nonew = 0;
944     }
945     /* Must zero here before the next loop */
946     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
947     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
948     for (r = 0; r < len; ++r) {
949       const PetscInt row = lrows[r] + A->rmap->rstart;
950       if (row >= A->cmap->N) continue;
951       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
952     }
953     aijA->nonew = nnwA;
954     aijB->nonew = nnwB;
955   } else {
956     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
957     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958   }
959   ierr = PetscFree(lrows);CHKERRQ(ierr);
960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962 
963   /* reduce nonzerostate */
964   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
965   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
966   if (gch) A->nonzerostate++;
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
971 {
972   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode    ierr;
974   PetscMPIInt       n = A->rmap->n;
975   PetscInt          i,j,r,m,len = 0;
976   PetscInt          *lrows,*owners = A->rmap->range;
977   PetscMPIInt       p = 0;
978   PetscSFNode       *rrows;
979   PetscSF           sf;
980   const PetscScalar *xx;
981   PetscScalar       *bb,*mask;
982   Vec               xmask,lmask;
983   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
984   const PetscInt    *aj, *ii,*ridx;
985   PetscScalar       *aa;
986 
987   PetscFunctionBegin;
988   /* Create SF where leaves are input rows and roots are owned rows */
989   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
990   for (r = 0; r < n; ++r) lrows[r] = -1;
991   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
992   for (r = 0; r < N; ++r) {
993     const PetscInt idx   = rows[r];
994     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
995     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
996       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
997     }
998     rrows[r].rank  = p;
999     rrows[r].index = rows[r] - owners[p];
1000   }
1001   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1002   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1003   /* Collect flags for rows to be zeroed */
1004   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1005   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1006   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1007   /* Compress and put in row numbers */
1008   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1009   /* zero diagonal part of matrix */
1010   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1011   /* handle off diagonal part of matrix */
1012   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1013   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1014   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1015   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1016   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1017   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1020   if (x && b) { /* this code is buggy when the row and column layout don't match */
1021     PetscBool cong;
1022 
1023     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1024     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1025     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1028     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1029   }
1030   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1031   /* remove zeroed rows of off diagonal matrix */
1032   ii = aij->i;
1033   for (i=0; i<len; i++) {
1034     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1035   }
1036   /* loop over all elements of off process part of matrix zeroing removed columns*/
1037   if (aij->compressedrow.use) {
1038     m    = aij->compressedrow.nrows;
1039     ii   = aij->compressedrow.i;
1040     ridx = aij->compressedrow.rindex;
1041     for (i=0; i<m; i++) {
1042       n  = ii[i+1] - ii[i];
1043       aj = aij->j + ii[i];
1044       aa = aij->a + ii[i];
1045 
1046       for (j=0; j<n; j++) {
1047         if (PetscAbsScalar(mask[*aj])) {
1048           if (b) bb[*ridx] -= *aa*xx[*aj];
1049           *aa = 0.0;
1050         }
1051         aa++;
1052         aj++;
1053       }
1054       ridx++;
1055     }
1056   } else { /* do not use compressed row format */
1057     m = l->B->rmap->n;
1058     for (i=0; i<m; i++) {
1059       n  = ii[i+1] - ii[i];
1060       aj = aij->j + ii[i];
1061       aa = aij->a + ii[i];
1062       for (j=0; j<n; j++) {
1063         if (PetscAbsScalar(mask[*aj])) {
1064           if (b) bb[i] -= *aa*xx[*aj];
1065           *aa = 0.0;
1066         }
1067         aa++;
1068         aj++;
1069       }
1070     }
1071   }
1072   if (x && b) {
1073     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1074     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1075   }
1076   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1077   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1078   ierr = PetscFree(lrows);CHKERRQ(ierr);
1079 
1080   /* only change matrix nonzero state if pattern was allowed to be changed */
1081   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1082     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1083     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092   PetscInt       nt;
1093   VecScatter     Mvctx = a->Mvctx;
1094 
1095   PetscFunctionBegin;
1096   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1097   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1098 
1099   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1100   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1101   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1102   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1107 {
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   VecScatter     Mvctx = a->Mvctx;
1121 
1122   PetscFunctionBegin;
1123   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1124   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1125   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1126   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1127   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   /* do nondiagonal part */
1138   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1139   /* do local part */
1140   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141   /* add partial results together */
1142   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1148 {
1149   MPI_Comm       comm;
1150   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1151   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1152   IS             Me,Notme;
1153   PetscErrorCode ierr;
1154   PetscInt       M,N,first,last,*notme,i;
1155   PetscBool      lf;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159   /* Easy test: symmetric diagonal block */
1160   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1161   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1162   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1190 {
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1199 {
1200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   /* do nondiagonal part */
1205   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* add partial results together */
1209   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /*
1215   This only works correctly for square matrices where the subblock A->A is the
1216    diagonal block
1217 */
1218 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1219 {
1220   PetscErrorCode ierr;
1221   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1222 
1223   PetscFunctionBegin;
1224   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1225   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1226   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1231 {
1232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1237   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1242 {
1243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247 #if defined(PETSC_USE_LOG)
1248   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1249 #endif
1250   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1251   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1252   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1254 #if defined(PETSC_USE_CTABLE)
1255   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1256 #else
1257   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1258 #endif
1259   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1261   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1262   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1277 #if defined(PETSC_HAVE_ELEMENTAL)
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1279 #endif
1280 #if defined(PETSC_HAVE_HYPRE)
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1283 #endif
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1292   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1293   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1294   PetscErrorCode ierr;
1295   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1296   int            fd;
1297   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1298   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1299   PetscScalar    *column_values;
1300   PetscInt       message_count,flowcontrolcount;
1301   FILE           *file;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312 
1313     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen  = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1319   } else {
1320     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     rlen = mat->rmap->n;
1322   }
1323 
1324   /* load up the local row counts */
1325   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1326   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1327 
1328   /* store the row lengths to the file */
1329   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1330   if (!rank) {
1331     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     for (i=1; i<size; i++) {
1333       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1334       rlen = range[i+1] - range[i];
1335       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     }
1338     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1339   } else {
1340     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1341     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1342     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1343   }
1344   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1345 
1346   /* load up the local column indices */
1347   nzmax = nz; /* th processor needs space a largest processor needs */
1348   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1349   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1350   cnt   = 0;
1351   for (i=0; i<mat->rmap->n; i++) {
1352     for (j=B->i[i]; j<B->i[i+1]; j++) {
1353       if ((col = garray[B->j[j]]) > cstart) break;
1354       column_indices[cnt++] = col;
1355     }
1356     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1357     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1358   }
1359   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1360 
1361   /* store the column indices to the file */
1362   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1363   if (!rank) {
1364     MPI_Status status;
1365     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1366     for (i=1; i<size; i++) {
1367       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1368       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1369       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1370       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1371       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1372     }
1373     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1374   } else {
1375     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1376     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1379   }
1380   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1381 
1382   /* load up the local column values */
1383   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1384   cnt  = 0;
1385   for (i=0; i<mat->rmap->n; i++) {
1386     for (j=B->i[i]; j<B->i[i+1]; j++) {
1387       if (garray[B->j[j]] > cstart) break;
1388       column_values[cnt++] = B->a[j];
1389     }
1390     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1391     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1392   }
1393   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1394 
1395   /* store the column values to the file */
1396   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1397   if (!rank) {
1398     MPI_Status status;
1399     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1400     for (i=1; i<size; i++) {
1401       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1402       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1403       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1404       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1405       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     }
1407     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1410     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1411     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1412     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1413   }
1414   ierr = PetscFree(column_values);CHKERRQ(ierr);
1415 
1416   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1417   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #include <petscdraw.h>
1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1423 {
1424   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1425   PetscErrorCode    ierr;
1426   PetscMPIInt       rank = aij->rank,size = aij->size;
1427   PetscBool         isdraw,iascii,isbinary;
1428   PetscViewer       sviewer;
1429   PetscViewerFormat format;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1435   if (iascii) {
1436     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1437     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1438       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1439       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1440       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1441       for (i=0; i<(PetscInt)size; i++) {
1442         nmax = PetscMax(nmax,nz[i]);
1443         nmin = PetscMin(nmin,nz[i]);
1444         navg += nz[i];
1445       }
1446       ierr = PetscFree(nz);CHKERRQ(ierr);
1447       navg = navg/size;
1448       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1449       PetscFunctionReturn(0);
1450     }
1451     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1452     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1453       MatInfo   info;
1454       PetscBool inodes;
1455 
1456       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1457       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1458       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1460       if (!inodes) {
1461         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1462                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1463       } else {
1464         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1465                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1466       }
1467       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1468       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1469       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1470       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1471       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1474       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1475       PetscFunctionReturn(0);
1476     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1477       PetscInt inodecount,inodelimit,*inodes;
1478       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1479       if (inodes) {
1480         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1481       } else {
1482         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1483       }
1484       PetscFunctionReturn(0);
1485     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1486       PetscFunctionReturn(0);
1487     }
1488   } else if (isbinary) {
1489     if (size == 1) {
1490       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1491       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1492     } else {
1493       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1494     }
1495     PetscFunctionReturn(0);
1496   } else if (iascii && size == 1) {
1497     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     PetscFunctionReturn(0);
1500   } else if (isdraw) {
1501     PetscDraw draw;
1502     PetscBool isnull;
1503     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1504     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1505     if (isnull) PetscFunctionReturn(0);
1506   }
1507 
1508   { /* assemble the entire matrix onto first processor */
1509     Mat A = NULL, Av;
1510     IS  isrow,iscol;
1511 
1512     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1513     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1514     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1515     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1516 /*  The commented code uses MatCreateSubMatrices instead */
1517 /*
1518     Mat *AA, A = NULL, Av;
1519     IS  isrow,iscol;
1520 
1521     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1522     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1523     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1524     if (!rank) {
1525        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1526        A    = AA[0];
1527        Av   = AA[0];
1528     }
1529     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1530 */
1531     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1532     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1533     /*
1534        Everyone has to call to draw the matrix since the graphics waits are
1535        synchronized across all processors that share the PetscDraw object
1536     */
1537     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1538     if (!rank) {
1539       if (((PetscObject)mat)->name) {
1540         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1541       }
1542       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1543     }
1544     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1545     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1546     ierr = MatDestroy(&A);CHKERRQ(ierr);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1552 {
1553   PetscErrorCode ierr;
1554   PetscBool      iascii,isdraw,issocket,isbinary;
1555 
1556   PetscFunctionBegin;
1557   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1558   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1559   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1560   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1561   if (iascii || isdraw || isbinary || issocket) {
1562     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1568 {
1569   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1570   PetscErrorCode ierr;
1571   Vec            bb1 = 0;
1572   PetscBool      hasop;
1573 
1574   PetscFunctionBegin;
1575   if (flag == SOR_APPLY_UPPER) {
1576     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1577     PetscFunctionReturn(0);
1578   }
1579 
1580   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1581     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1582   }
1583 
1584   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1585     if (flag & SOR_ZERO_INITIAL_GUESS) {
1586       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1587       its--;
1588     }
1589 
1590     while (its--) {
1591       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593 
1594       /* update rhs: bb1 = bb - B*x */
1595       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1596       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1597 
1598       /* local sweep */
1599       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1600     }
1601   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1602     if (flag & SOR_ZERO_INITIAL_GUESS) {
1603       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1604       its--;
1605     }
1606     while (its--) {
1607       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609 
1610       /* update rhs: bb1 = bb - B*x */
1611       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1612       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1613 
1614       /* local sweep */
1615       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1616     }
1617   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622     while (its--) {
1623       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626       /* update rhs: bb1 = bb - B*x */
1627       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1628       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1629 
1630       /* local sweep */
1631       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1632     }
1633   } else if (flag & SOR_EISENSTAT) {
1634     Vec xx1;
1635 
1636     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1637     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1638 
1639     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641     if (!mat->diag) {
1642       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1643       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1644     }
1645     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1646     if (hasop) {
1647       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1648     } else {
1649       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1650     }
1651     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1652 
1653     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1654 
1655     /* local sweep */
1656     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1657     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1658     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1659   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1660 
1661   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1662 
1663   matin->factorerrortype = mat->A->factorerrortype;
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1668 {
1669   Mat            aA,aB,Aperm;
1670   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1671   PetscScalar    *aa,*ba;
1672   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1673   PetscSF        rowsf,sf;
1674   IS             parcolp = NULL;
1675   PetscBool      done;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1680   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1681   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1682   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1683 
1684   /* Invert row permutation to find out where my rows should go */
1685   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1686   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1687   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1688   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1689   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1690   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1691 
1692   /* Invert column permutation to find out where my columns should go */
1693   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1694   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1695   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1696   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1697   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1698   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1699   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1700 
1701   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1702   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1703   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1704 
1705   /* Find out where my gcols should go */
1706   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1707   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1708   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1709   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1710   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1711   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1712   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1713   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1714 
1715   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1716   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1717   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1718   for (i=0; i<m; i++) {
1719     PetscInt    row = rdest[i];
1720     PetscMPIInt rowner;
1721     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1722     for (j=ai[i]; j<ai[i+1]; j++) {
1723       PetscInt    col = cdest[aj[j]];
1724       PetscMPIInt cowner;
1725       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1726       if (rowner == cowner) dnnz[i]++;
1727       else onnz[i]++;
1728     }
1729     for (j=bi[i]; j<bi[i+1]; j++) {
1730       PetscInt    col = gcdest[bj[j]];
1731       PetscMPIInt cowner;
1732       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1733       if (rowner == cowner) dnnz[i]++;
1734       else onnz[i]++;
1735     }
1736   }
1737   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1738   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1739   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1740   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1741   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1742 
1743   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1744   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1745   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1746   for (i=0; i<m; i++) {
1747     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1748     PetscInt j0,rowlen;
1749     rowlen = ai[i+1] - ai[i];
1750     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1751       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1752       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1753     }
1754     rowlen = bi[i+1] - bi[i];
1755     for (j0=j=0; j<rowlen; j0=j) {
1756       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1757       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1758     }
1759   }
1760   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1761   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1762   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1763   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1764   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1765   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1766   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1767   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1768   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1769   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1770   *B = Aperm;
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1775 {
1776   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1781   if (ghosts) *ghosts = aij->garray;
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1786 {
1787   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1788   Mat            A    = mat->A,B = mat->B;
1789   PetscErrorCode ierr;
1790   PetscLogDouble isend[5],irecv[5];
1791 
1792   PetscFunctionBegin;
1793   info->block_size = 1.0;
1794   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1795 
1796   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1797   isend[3] = info->memory;  isend[4] = info->mallocs;
1798 
1799   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1800 
1801   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1802   isend[3] += info->memory;  isend[4] += info->mallocs;
1803   if (flag == MAT_LOCAL) {
1804     info->nz_used      = isend[0];
1805     info->nz_allocated = isend[1];
1806     info->nz_unneeded  = isend[2];
1807     info->memory       = isend[3];
1808     info->mallocs      = isend[4];
1809   } else if (flag == MAT_GLOBAL_MAX) {
1810     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1811 
1812     info->nz_used      = irecv[0];
1813     info->nz_allocated = irecv[1];
1814     info->nz_unneeded  = irecv[2];
1815     info->memory       = irecv[3];
1816     info->mallocs      = irecv[4];
1817   } else if (flag == MAT_GLOBAL_SUM) {
1818     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1819 
1820     info->nz_used      = irecv[0];
1821     info->nz_allocated = irecv[1];
1822     info->nz_unneeded  = irecv[2];
1823     info->memory       = irecv[3];
1824     info->mallocs      = irecv[4];
1825   }
1826   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1827   info->fill_ratio_needed = 0;
1828   info->factor_mallocs    = 0;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1833 {
1834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1835   PetscErrorCode ierr;
1836 
1837   PetscFunctionBegin;
1838   switch (op) {
1839   case MAT_NEW_NONZERO_LOCATIONS:
1840   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1841   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1842   case MAT_KEEP_NONZERO_PATTERN:
1843   case MAT_NEW_NONZERO_LOCATION_ERR:
1844   case MAT_USE_INODES:
1845   case MAT_IGNORE_ZERO_ENTRIES:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1849     break;
1850   case MAT_ROW_ORIENTED:
1851     MatCheckPreallocated(A,1);
1852     a->roworiented = flg;
1853 
1854     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1855     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1856     break;
1857   case MAT_NEW_DIAGONALS:
1858   case MAT_SORTED_FULL:
1859     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1860     break;
1861   case MAT_IGNORE_OFF_PROC_ENTRIES:
1862     a->donotstash = flg;
1863     break;
1864   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1865   case MAT_SPD:
1866   case MAT_SYMMETRIC:
1867   case MAT_STRUCTURALLY_SYMMETRIC:
1868   case MAT_HERMITIAN:
1869   case MAT_SYMMETRY_ETERNAL:
1870     break;
1871   case MAT_SUBMAT_SINGLEIS:
1872     A->submat_singleis = flg;
1873     break;
1874   case MAT_STRUCTURE_ONLY:
1875     /* The option is handled directly by MatSetOption() */
1876     break;
1877   default:
1878     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1884 {
1885   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1886   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1887   PetscErrorCode ierr;
1888   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1889   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1890   PetscInt       *cmap,*idx_p;
1891 
1892   PetscFunctionBegin;
1893   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1894   mat->getrowactive = PETSC_TRUE;
1895 
1896   if (!mat->rowvalues && (idx || v)) {
1897     /*
1898         allocate enough space to hold information from the longest row.
1899     */
1900     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1901     PetscInt   max = 1,tmp;
1902     for (i=0; i<matin->rmap->n; i++) {
1903       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1904       if (max < tmp) max = tmp;
1905     }
1906     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1907   }
1908 
1909   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1910   lrow = row - rstart;
1911 
1912   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1913   if (!v)   {pvA = 0; pvB = 0;}
1914   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1915   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1916   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1917   nztot = nzA + nzB;
1918 
1919   cmap = mat->garray;
1920   if (v  || idx) {
1921     if (nztot) {
1922       /* Sort by increasing column numbers, assuming A and B already sorted */
1923       PetscInt imark = -1;
1924       if (v) {
1925         *v = v_p = mat->rowvalues;
1926         for (i=0; i<nzB; i++) {
1927           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1928           else break;
1929         }
1930         imark = i;
1931         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1932         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1933       }
1934       if (idx) {
1935         *idx = idx_p = mat->rowindices;
1936         if (imark > -1) {
1937           for (i=0; i<imark; i++) {
1938             idx_p[i] = cmap[cworkB[i]];
1939           }
1940         } else {
1941           for (i=0; i<nzB; i++) {
1942             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1943             else break;
1944           }
1945           imark = i;
1946         }
1947         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1948         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1949       }
1950     } else {
1951       if (idx) *idx = 0;
1952       if (v)   *v   = 0;
1953     }
1954   }
1955   *nz  = nztot;
1956   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1957   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1962 {
1963   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1964 
1965   PetscFunctionBegin;
1966   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1967   aij->getrowactive = PETSC_FALSE;
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1972 {
1973   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1974   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1975   PetscErrorCode ierr;
1976   PetscInt       i,j,cstart = mat->cmap->rstart;
1977   PetscReal      sum = 0.0;
1978   MatScalar      *v;
1979 
1980   PetscFunctionBegin;
1981   if (aij->size == 1) {
1982     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1983   } else {
1984     if (type == NORM_FROBENIUS) {
1985       v = amat->a;
1986       for (i=0; i<amat->nz; i++) {
1987         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1988       }
1989       v = bmat->a;
1990       for (i=0; i<bmat->nz; i++) {
1991         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1992       }
1993       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1994       *norm = PetscSqrtReal(*norm);
1995       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1996     } else if (type == NORM_1) { /* max column norm */
1997       PetscReal *tmp,*tmp2;
1998       PetscInt  *jj,*garray = aij->garray;
1999       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
2000       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
2001       *norm = 0.0;
2002       v     = amat->a; jj = amat->j;
2003       for (j=0; j<amat->nz; j++) {
2004         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2005       }
2006       v = bmat->a; jj = bmat->j;
2007       for (j=0; j<bmat->nz; j++) {
2008         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2009       }
2010       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2011       for (j=0; j<mat->cmap->N; j++) {
2012         if (tmp2[j] > *norm) *norm = tmp2[j];
2013       }
2014       ierr = PetscFree(tmp);CHKERRQ(ierr);
2015       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2016       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2017     } else if (type == NORM_INFINITY) { /* max row norm */
2018       PetscReal ntemp = 0.0;
2019       for (j=0; j<aij->A->rmap->n; j++) {
2020         v   = amat->a + amat->i[j];
2021         sum = 0.0;
2022         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2023           sum += PetscAbsScalar(*v); v++;
2024         }
2025         v = bmat->a + bmat->i[j];
2026         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2027           sum += PetscAbsScalar(*v); v++;
2028         }
2029         if (sum > ntemp) ntemp = sum;
2030       }
2031       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2032       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2033     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2039 {
2040   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2041   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2042   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2043   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2044   PetscErrorCode  ierr;
2045   Mat             B,A_diag,*B_diag;
2046   const MatScalar *array;
2047 
2048   PetscFunctionBegin;
2049   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2050   ai = Aloc->i; aj = Aloc->j;
2051   bi = Bloc->i; bj = Bloc->j;
2052   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2053     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2054     PetscSFNode          *oloc;
2055     PETSC_UNUSED PetscSF sf;
2056 
2057     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2058     /* compute d_nnz for preallocation */
2059     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2060     for (i=0; i<ai[ma]; i++) {
2061       d_nnz[aj[i]]++;
2062     }
2063     /* compute local off-diagonal contributions */
2064     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2065     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2066     /* map those to global */
2067     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2068     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2069     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2070     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2071     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2072     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2073     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2074 
2075     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2076     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2077     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2078     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2079     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2080     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2081   } else {
2082     B    = *matout;
2083     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2084   }
2085 
2086   b           = (Mat_MPIAIJ*)B->data;
2087   A_diag      = a->A;
2088   B_diag      = &b->A;
2089   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2090   A_diag_ncol = A_diag->cmap->N;
2091   B_diag_ilen = sub_B_diag->ilen;
2092   B_diag_i    = sub_B_diag->i;
2093 
2094   /* Set ilen for diagonal of B */
2095   for (i=0; i<A_diag_ncol; i++) {
2096     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2097   }
2098 
2099   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2100   very quickly (=without using MatSetValues), because all writes are local. */
2101   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2102 
2103   /* copy over the B part */
2104   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2105   array = Bloc->a;
2106   row   = A->rmap->rstart;
2107   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2108   cols_tmp = cols;
2109   for (i=0; i<mb; i++) {
2110     ncol = bi[i+1]-bi[i];
2111     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2112     row++;
2113     array += ncol; cols_tmp += ncol;
2114   }
2115   ierr = PetscFree(cols);CHKERRQ(ierr);
2116 
2117   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2118   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2120     *matout = B;
2121   } else {
2122     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2128 {
2129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2130   Mat            a    = aij->A,b = aij->B;
2131   PetscErrorCode ierr;
2132   PetscInt       s1,s2,s3;
2133 
2134   PetscFunctionBegin;
2135   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2136   if (rr) {
2137     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2138     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2139     /* Overlap communication with computation. */
2140     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2141   }
2142   if (ll) {
2143     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2144     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2145     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2146   }
2147   /* scale  the diagonal block */
2148   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2149 
2150   if (rr) {
2151     /* Do a scatter end and then right scale the off-diagonal block */
2152     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2153     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2154   }
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2159 {
2160   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2161   PetscErrorCode ierr;
2162 
2163   PetscFunctionBegin;
2164   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2169 {
2170   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2171   Mat            a,b,c,d;
2172   PetscBool      flg;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   a = matA->A; b = matA->B;
2177   c = matB->A; d = matB->B;
2178 
2179   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2180   if (flg) {
2181     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2182   }
2183   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2191   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2192 
2193   PetscFunctionBegin;
2194   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2195   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2196     /* because of the column compression in the off-processor part of the matrix a->B,
2197        the number of columns in a->B and b->B may be different, hence we cannot call
2198        the MatCopy() directly on the two parts. If need be, we can provide a more
2199        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2200        then copying the submatrices */
2201     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2202   } else {
2203     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2204     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2205   }
2206   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2211 {
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 /*
2220    Computes the number of nonzeros per row needed for preallocation when X and Y
2221    have different nonzero structure.
2222 */
2223 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2224 {
2225   PetscInt       i,j,k,nzx,nzy;
2226 
2227   PetscFunctionBegin;
2228   /* Set the number of nonzeros in the new matrix */
2229   for (i=0; i<m; i++) {
2230     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2231     nzx = xi[i+1] - xi[i];
2232     nzy = yi[i+1] - yi[i];
2233     nnz[i] = 0;
2234     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2235       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2236       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2237       nnz[i]++;
2238     }
2239     for (; k<nzy; k++) nnz[i]++;
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2245 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2246 {
2247   PetscErrorCode ierr;
2248   PetscInt       m = Y->rmap->N;
2249   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2250   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2258 {
2259   PetscErrorCode ierr;
2260   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2261   PetscBLASInt   bnz,one=1;
2262   Mat_SeqAIJ     *x,*y;
2263 
2264   PetscFunctionBegin;
2265   if (str == SAME_NONZERO_PATTERN) {
2266     PetscScalar alpha = a;
2267     x    = (Mat_SeqAIJ*)xx->A->data;
2268     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2269     y    = (Mat_SeqAIJ*)yy->A->data;
2270     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2271     x    = (Mat_SeqAIJ*)xx->B->data;
2272     y    = (Mat_SeqAIJ*)yy->B->data;
2273     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2274     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2275     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2276     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2277        will be updated */
2278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2279     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2280       Y->offloadmask = PETSC_OFFLOAD_CPU;
2281     }
2282 #endif
2283   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2284     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2285   } else {
2286     Mat      B;
2287     PetscInt *nnz_d,*nnz_o;
2288     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2289     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2290     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2291     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2292     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2293     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2294     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2295     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2296     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2297     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2298     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2299     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2300     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2301     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2302   }
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2307 
2308 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2309 {
2310 #if defined(PETSC_USE_COMPLEX)
2311   PetscErrorCode ierr;
2312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2316   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2317 #else
2318   PetscFunctionBegin;
2319 #endif
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2324 {
2325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2330   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2335 {
2336   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2341   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2346 {
2347   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2348   PetscErrorCode ierr;
2349   PetscInt       i,*idxb = 0;
2350   PetscScalar    *va,*vb;
2351   Vec            vtmp;
2352 
2353   PetscFunctionBegin;
2354   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2355   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2356   if (idx) {
2357     for (i=0; i<A->rmap->n; i++) {
2358       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2359     }
2360   }
2361 
2362   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2363   if (idx) {
2364     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2365   }
2366   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2367   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2368 
2369   for (i=0; i<A->rmap->n; i++) {
2370     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2371       va[i] = vb[i];
2372       if (idx) idx[i] = a->garray[idxb[i]];
2373     }
2374   }
2375 
2376   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2378   ierr = PetscFree(idxb);CHKERRQ(ierr);
2379   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2384 {
2385   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2386   PetscErrorCode ierr;
2387   PetscInt       i,*idxb = 0;
2388   PetscScalar    *va,*vb;
2389   Vec            vtmp;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2394   if (idx) {
2395     for (i=0; i<A->cmap->n; i++) {
2396       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2397     }
2398   }
2399 
2400   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2401   if (idx) {
2402     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2403   }
2404   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2405   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2406 
2407   for (i=0; i<A->rmap->n; i++) {
2408     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2409       va[i] = vb[i];
2410       if (idx) idx[i] = a->garray[idxb[i]];
2411     }
2412   }
2413 
2414   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2415   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2416   ierr = PetscFree(idxb);CHKERRQ(ierr);
2417   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2422 {
2423   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2424   PetscInt       n      = A->rmap->n;
2425   PetscInt       cstart = A->cmap->rstart;
2426   PetscInt       *cmap  = mat->garray;
2427   PetscInt       *diagIdx, *offdiagIdx;
2428   Vec            diagV, offdiagV;
2429   PetscScalar    *a, *diagA, *offdiagA;
2430   PetscInt       r;
2431   PetscErrorCode ierr;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2436   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2437   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2438   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2440   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2441   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2442   for (r = 0; r < n; ++r) {
2443     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2444       a[r]   = diagA[r];
2445       idx[r] = cstart + diagIdx[r];
2446     } else {
2447       a[r]   = offdiagA[r];
2448       idx[r] = cmap[offdiagIdx[r]];
2449     }
2450   }
2451   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2452   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2453   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2454   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2455   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2456   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2461 {
2462   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2463   PetscInt       n      = A->rmap->n;
2464   PetscInt       cstart = A->cmap->rstart;
2465   PetscInt       *cmap  = mat->garray;
2466   PetscInt       *diagIdx, *offdiagIdx;
2467   Vec            diagV, offdiagV;
2468   PetscScalar    *a, *diagA, *offdiagA;
2469   PetscInt       r;
2470   PetscErrorCode ierr;
2471 
2472   PetscFunctionBegin;
2473   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2476   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2477   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2478   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2479   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2480   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2481   for (r = 0; r < n; ++r) {
2482     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2483       a[r]   = diagA[r];
2484       idx[r] = cstart + diagIdx[r];
2485     } else {
2486       a[r]   = offdiagA[r];
2487       idx[r] = cmap[offdiagIdx[r]];
2488     }
2489   }
2490   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2491   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2492   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2493   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2494   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2495   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2500 {
2501   PetscErrorCode ierr;
2502   Mat            *dummy;
2503 
2504   PetscFunctionBegin;
2505   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2506   *newmat = *dummy;
2507   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2512 {
2513   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2514   PetscErrorCode ierr;
2515 
2516   PetscFunctionBegin;
2517   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2518   A->factorerrortype = a->A->factorerrortype;
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2523 {
2524   PetscErrorCode ierr;
2525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2526 
2527   PetscFunctionBegin;
2528   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2529   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2530   if (x->assembled) {
2531     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2532   } else {
2533     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2534   }
2535   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2536   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2541 {
2542   PetscFunctionBegin;
2543   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2544   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 /*@
2549    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2550 
2551    Collective on Mat
2552 
2553    Input Parameters:
2554 +    A - the matrix
2555 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2556 
2557  Level: advanced
2558 
2559 @*/
2560 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2561 {
2562   PetscErrorCode       ierr;
2563 
2564   PetscFunctionBegin;
2565   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2570 {
2571   PetscErrorCode       ierr;
2572   PetscBool            sc = PETSC_FALSE,flg;
2573 
2574   PetscFunctionBegin;
2575   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2576   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2577   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2578   if (flg) {
2579     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2580   }
2581   ierr = PetscOptionsTail();CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2586 {
2587   PetscErrorCode ierr;
2588   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2589   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2590 
2591   PetscFunctionBegin;
2592   if (!Y->preallocated) {
2593     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2594   } else if (!aij->nz) {
2595     PetscInt nonew = aij->nonew;
2596     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2597     aij->nonew = nonew;
2598   }
2599   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2604 {
2605   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2606   PetscErrorCode ierr;
2607 
2608   PetscFunctionBegin;
2609   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2610   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2611   if (d) {
2612     PetscInt rstart;
2613     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2614     *d += rstart;
2615 
2616   }
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2621 {
2622   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2623   PetscErrorCode ierr;
2624 
2625   PetscFunctionBegin;
2626   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 /* -------------------------------------------------------------------*/
2631 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2632                                        MatGetRow_MPIAIJ,
2633                                        MatRestoreRow_MPIAIJ,
2634                                        MatMult_MPIAIJ,
2635                                 /* 4*/ MatMultAdd_MPIAIJ,
2636                                        MatMultTranspose_MPIAIJ,
2637                                        MatMultTransposeAdd_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*10*/ 0,
2642                                        0,
2643                                        0,
2644                                        MatSOR_MPIAIJ,
2645                                        MatTranspose_MPIAIJ,
2646                                 /*15*/ MatGetInfo_MPIAIJ,
2647                                        MatEqual_MPIAIJ,
2648                                        MatGetDiagonal_MPIAIJ,
2649                                        MatDiagonalScale_MPIAIJ,
2650                                        MatNorm_MPIAIJ,
2651                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2652                                        MatAssemblyEnd_MPIAIJ,
2653                                        MatSetOption_MPIAIJ,
2654                                        MatZeroEntries_MPIAIJ,
2655                                 /*24*/ MatZeroRows_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*29*/ MatSetUp_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        MatGetDiagonalBlock_MPIAIJ,
2664                                        0,
2665                                 /*34*/ MatDuplicate_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*39*/ MatAXPY_MPIAIJ,
2671                                        MatCreateSubMatrices_MPIAIJ,
2672                                        MatIncreaseOverlap_MPIAIJ,
2673                                        MatGetValues_MPIAIJ,
2674                                        MatCopy_MPIAIJ,
2675                                 /*44*/ MatGetRowMax_MPIAIJ,
2676                                        MatScale_MPIAIJ,
2677                                        MatShift_MPIAIJ,
2678                                        MatDiagonalSet_MPIAIJ,
2679                                        MatZeroRowsColumns_MPIAIJ,
2680                                 /*49*/ MatSetRandom_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2686                                        0,
2687                                        MatSetUnfactored_MPIAIJ,
2688                                        MatPermute_MPIAIJ,
2689                                        0,
2690                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2691                                        MatDestroy_MPIAIJ,
2692                                        MatView_MPIAIJ,
2693                                        0,
2694                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2695                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2696                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2701                                        MatGetRowMinAbs_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                 /*75*/ MatFDColoringApply_AIJ,
2707                                        MatSetFromOptions_MPIAIJ,
2708                                        0,
2709                                        0,
2710                                        MatFindZeroDiagonals_MPIAIJ,
2711                                 /*80*/ 0,
2712                                        0,
2713                                        0,
2714                                 /*83*/ MatLoad_MPIAIJ,
2715                                        MatIsSymmetric_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2721                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2722                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2723                                        MatPtAP_MPIAIJ_MPIAIJ,
2724                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2725                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        MatPinToCPU_MPIAIJ,
2730                                 /*99*/ 0,
2731                                        0,
2732                                        0,
2733                                        MatConjugate_MPIAIJ,
2734                                        0,
2735                                 /*104*/MatSetValuesRow_MPIAIJ,
2736                                        MatRealPart_MPIAIJ,
2737                                        MatImaginaryPart_MPIAIJ,
2738                                        0,
2739                                        0,
2740                                 /*109*/0,
2741                                        0,
2742                                        MatGetRowMin_MPIAIJ,
2743                                        0,
2744                                        MatMissingDiagonal_MPIAIJ,
2745                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2746                                        0,
2747                                        MatGetGhosts_MPIAIJ,
2748                                        0,
2749                                        0,
2750                                 /*119*/0,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                        MatGetMultiProcBlock_MPIAIJ,
2755                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2756                                        MatGetColumnNorms_MPIAIJ,
2757                                        MatInvertBlockDiagonal_MPIAIJ,
2758                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2759                                        MatCreateSubMatricesMPI_MPIAIJ,
2760                                 /*129*/0,
2761                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2762                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2763                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2764                                        0,
2765                                 /*134*/0,
2766                                        0,
2767                                        MatRARt_MPIAIJ_MPIAIJ,
2768                                        0,
2769                                        0,
2770                                 /*139*/MatSetBlockSizes_MPIAIJ,
2771                                        0,
2772                                        0,
2773                                        MatFDColoringSetUp_MPIXAIJ,
2774                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2775                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2776 };
2777 
2778 /* ----------------------------------------------------------------------------------------*/
2779 
2780 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2781 {
2782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2787   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2792 {
2793   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2798   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2803 {
2804   Mat_MPIAIJ     *b;
2805   PetscErrorCode ierr;
2806   PetscMPIInt    size;
2807 
2808   PetscFunctionBegin;
2809   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2810   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2811   b = (Mat_MPIAIJ*)B->data;
2812 
2813 #if defined(PETSC_USE_CTABLE)
2814   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2815 #else
2816   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2817 #endif
2818   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2819   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2820   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2821 
2822   /* Because the B will have been resized we simply destroy it and create a new one each time */
2823   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2824   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2825   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2826   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2827   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2828   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2829   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2830 
2831   if (!B->preallocated) {
2832     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2833     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2834     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2835     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2836     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2837   }
2838 
2839   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2840   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2841   B->preallocated  = PETSC_TRUE;
2842   B->was_assembled = PETSC_FALSE;
2843   B->assembled     = PETSC_FALSE;
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2848 {
2849   Mat_MPIAIJ     *b;
2850   PetscErrorCode ierr;
2851 
2852   PetscFunctionBegin;
2853   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2854   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2855   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2856   b = (Mat_MPIAIJ*)B->data;
2857 
2858 #if defined(PETSC_USE_CTABLE)
2859   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2860 #else
2861   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2862 #endif
2863   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2864   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2865   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2866 
2867   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2868   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2869   B->preallocated  = PETSC_TRUE;
2870   B->was_assembled = PETSC_FALSE;
2871   B->assembled = PETSC_FALSE;
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2876 {
2877   Mat            mat;
2878   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2879   PetscErrorCode ierr;
2880 
2881   PetscFunctionBegin;
2882   *newmat = 0;
2883   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2884   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2885   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2886   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2887   a       = (Mat_MPIAIJ*)mat->data;
2888 
2889   mat->factortype   = matin->factortype;
2890   mat->assembled    = PETSC_TRUE;
2891   mat->insertmode   = NOT_SET_VALUES;
2892   mat->preallocated = PETSC_TRUE;
2893 
2894   a->size         = oldmat->size;
2895   a->rank         = oldmat->rank;
2896   a->donotstash   = oldmat->donotstash;
2897   a->roworiented  = oldmat->roworiented;
2898   a->rowindices   = 0;
2899   a->rowvalues    = 0;
2900   a->getrowactive = PETSC_FALSE;
2901 
2902   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2903   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2904 
2905   if (oldmat->colmap) {
2906 #if defined(PETSC_USE_CTABLE)
2907     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2908 #else
2909     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2910     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2911     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2912 #endif
2913   } else a->colmap = 0;
2914   if (oldmat->garray) {
2915     PetscInt len;
2916     len  = oldmat->B->cmap->n;
2917     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2918     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2919     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2920   } else a->garray = 0;
2921 
2922   /* It may happen MatDuplicate is called with a non-assembled matrix
2923      In fact, MatDuplicate only requires the matrix to be preallocated
2924      This may happen inside a DMCreateMatrix_Shell */
2925   if (oldmat->lvec) {
2926     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2927     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2928   }
2929   if (oldmat->Mvctx) {
2930     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2931     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2932   }
2933   if (oldmat->Mvctx_mpi1) {
2934     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2935     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2936   }
2937 
2938   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2939   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2940   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2941   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2942   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2943   *newmat = mat;
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2948 {
2949   PetscBool      isbinary, ishdf5;
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2954   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2955   /* force binary viewer to load .info file if it has not yet done so */
2956   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2957   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2958   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2959   if (isbinary) {
2960     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2961   } else if (ishdf5) {
2962 #if defined(PETSC_HAVE_HDF5)
2963     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2964 #else
2965     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2966 #endif
2967   } else {
2968     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2969   }
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2974 {
2975   PetscScalar    *vals,*svals;
2976   MPI_Comm       comm;
2977   PetscErrorCode ierr;
2978   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2979   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2980   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2981   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2982   PetscInt       cend,cstart,n,*rowners;
2983   int            fd;
2984   PetscInt       bs = newMat->rmap->bs;
2985 
2986   PetscFunctionBegin;
2987   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2988   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2989   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2990   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2991   if (!rank) {
2992     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2993     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2994     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2995   }
2996 
2997   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2998   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2999   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3000   if (bs < 0) bs = 1;
3001 
3002   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3003   M    = header[1]; N = header[2];
3004 
3005   /* If global sizes are set, check if they are consistent with that given in the file */
3006   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3007   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3008 
3009   /* determine ownership of all (block) rows */
3010   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3011   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3012   else m = newMat->rmap->n; /* Set by user */
3013 
3014   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3015   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3016 
3017   /* First process needs enough room for process with most rows */
3018   if (!rank) {
3019     mmax = rowners[1];
3020     for (i=2; i<=size; i++) {
3021       mmax = PetscMax(mmax, rowners[i]);
3022     }
3023   } else mmax = -1;             /* unused, but compilers complain */
3024 
3025   rowners[0] = 0;
3026   for (i=2; i<=size; i++) {
3027     rowners[i] += rowners[i-1];
3028   }
3029   rstart = rowners[rank];
3030   rend   = rowners[rank+1];
3031 
3032   /* distribute row lengths to all processors */
3033   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3034   if (!rank) {
3035     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3036     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3037     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3038     for (j=0; j<m; j++) {
3039       procsnz[0] += ourlens[j];
3040     }
3041     for (i=1; i<size; i++) {
3042       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3043       /* calculate the number of nonzeros on each processor */
3044       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3045         procsnz[i] += rowlengths[j];
3046       }
3047       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3048     }
3049     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3050   } else {
3051     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3052   }
3053 
3054   if (!rank) {
3055     /* determine max buffer needed and allocate it */
3056     maxnz = 0;
3057     for (i=0; i<size; i++) {
3058       maxnz = PetscMax(maxnz,procsnz[i]);
3059     }
3060     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3061 
3062     /* read in my part of the matrix column indices  */
3063     nz   = procsnz[0];
3064     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3065     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3066 
3067     /* read in every one elses and ship off */
3068     for (i=1; i<size; i++) {
3069       nz   = procsnz[i];
3070       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3071       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3072     }
3073     ierr = PetscFree(cols);CHKERRQ(ierr);
3074   } else {
3075     /* determine buffer space needed for message */
3076     nz = 0;
3077     for (i=0; i<m; i++) {
3078       nz += ourlens[i];
3079     }
3080     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3081 
3082     /* receive message of column indices*/
3083     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3084   }
3085 
3086   /* determine column ownership if matrix is not square */
3087   if (N != M) {
3088     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3089     else n = newMat->cmap->n;
3090     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3091     cstart = cend - n;
3092   } else {
3093     cstart = rstart;
3094     cend   = rend;
3095     n      = cend - cstart;
3096   }
3097 
3098   /* loop over local rows, determining number of off diagonal entries */
3099   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3100   jj   = 0;
3101   for (i=0; i<m; i++) {
3102     for (j=0; j<ourlens[i]; j++) {
3103       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3104       jj++;
3105     }
3106   }
3107 
3108   for (i=0; i<m; i++) {
3109     ourlens[i] -= offlens[i];
3110   }
3111   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3112 
3113   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3114 
3115   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3116 
3117   for (i=0; i<m; i++) {
3118     ourlens[i] += offlens[i];
3119   }
3120 
3121   if (!rank) {
3122     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3123 
3124     /* read in my part of the matrix numerical values  */
3125     nz   = procsnz[0];
3126     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3127 
3128     /* insert into matrix */
3129     jj      = rstart;
3130     smycols = mycols;
3131     svals   = vals;
3132     for (i=0; i<m; i++) {
3133       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3134       smycols += ourlens[i];
3135       svals   += ourlens[i];
3136       jj++;
3137     }
3138 
3139     /* read in other processors and ship out */
3140     for (i=1; i<size; i++) {
3141       nz   = procsnz[i];
3142       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3143       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3144     }
3145     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3146   } else {
3147     /* receive numeric values */
3148     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3149 
3150     /* receive message of values*/
3151     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3152 
3153     /* insert into matrix */
3154     jj      = rstart;
3155     smycols = mycols;
3156     svals   = vals;
3157     for (i=0; i<m; i++) {
3158       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3159       smycols += ourlens[i];
3160       svals   += ourlens[i];
3161       jj++;
3162     }
3163   }
3164   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3165   ierr = PetscFree(vals);CHKERRQ(ierr);
3166   ierr = PetscFree(mycols);CHKERRQ(ierr);
3167   ierr = PetscFree(rowners);CHKERRQ(ierr);
3168   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3169   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3170   PetscFunctionReturn(0);
3171 }
3172 
3173 /* Not scalable because of ISAllGather() unless getting all columns. */
3174 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3175 {
3176   PetscErrorCode ierr;
3177   IS             iscol_local;
3178   PetscBool      isstride;
3179   PetscMPIInt    lisstride=0,gisstride;
3180 
3181   PetscFunctionBegin;
3182   /* check if we are grabbing all columns*/
3183   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3184 
3185   if (isstride) {
3186     PetscInt  start,len,mstart,mlen;
3187     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3188     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3189     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3190     if (mstart == start && mlen-mstart == len) lisstride = 1;
3191   }
3192 
3193   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3194   if (gisstride) {
3195     PetscInt N;
3196     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3197     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3198     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3199     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3200   } else {
3201     PetscInt cbs;
3202     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3203     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3204     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3205   }
3206 
3207   *isseq = iscol_local;
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 /*
3212  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3213  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3214 
3215  Input Parameters:
3216    mat - matrix
3217    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3218            i.e., mat->rstart <= isrow[i] < mat->rend
3219    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3220            i.e., mat->cstart <= iscol[i] < mat->cend
3221  Output Parameter:
3222    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3223    iscol_o - sequential column index set for retrieving mat->B
3224    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3225  */
3226 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3227 {
3228   PetscErrorCode ierr;
3229   Vec            x,cmap;
3230   const PetscInt *is_idx;
3231   PetscScalar    *xarray,*cmaparray;
3232   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3233   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3234   Mat            B=a->B;
3235   Vec            lvec=a->lvec,lcmap;
3236   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3237   MPI_Comm       comm;
3238   VecScatter     Mvctx=a->Mvctx;
3239 
3240   PetscFunctionBegin;
3241   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3242   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3243 
3244   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3245   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3246   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3247   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3248   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3249 
3250   /* Get start indices */
3251   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3252   isstart -= ncols;
3253   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3254 
3255   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3256   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3257   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3258   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3259   for (i=0; i<ncols; i++) {
3260     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3261     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3262     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3263   }
3264   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3265   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3266   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3267 
3268   /* Get iscol_d */
3269   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3270   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3271   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3272 
3273   /* Get isrow_d */
3274   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3275   rstart = mat->rmap->rstart;
3276   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3277   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3278   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3279   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3280 
3281   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3282   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3283   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3284 
3285   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3286   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3287   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3288 
3289   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3290 
3291   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3292   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3293 
3294   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3295   /* off-process column indices */
3296   count = 0;
3297   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3298   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3299 
3300   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3301   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3302   for (i=0; i<Bn; i++) {
3303     if (PetscRealPart(xarray[i]) > -1.0) {
3304       idx[count]     = i;                   /* local column index in off-diagonal part B */
3305       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3306       count++;
3307     }
3308   }
3309   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3310   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3311 
3312   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3313   /* cannot ensure iscol_o has same blocksize as iscol! */
3314 
3315   ierr = PetscFree(idx);CHKERRQ(ierr);
3316   *garray = cmap1;
3317 
3318   ierr = VecDestroy(&x);CHKERRQ(ierr);
3319   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3320   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3321   PetscFunctionReturn(0);
3322 }
3323 
3324 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3325 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3326 {
3327   PetscErrorCode ierr;
3328   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3329   Mat            M = NULL;
3330   MPI_Comm       comm;
3331   IS             iscol_d,isrow_d,iscol_o;
3332   Mat            Asub = NULL,Bsub = NULL;
3333   PetscInt       n;
3334 
3335   PetscFunctionBegin;
3336   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3337 
3338   if (call == MAT_REUSE_MATRIX) {
3339     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3340     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3341     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3342 
3343     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3344     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3345 
3346     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3347     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3348 
3349     /* Update diagonal and off-diagonal portions of submat */
3350     asub = (Mat_MPIAIJ*)(*submat)->data;
3351     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3352     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3353     if (n) {
3354       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3355     }
3356     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3357     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3358 
3359   } else { /* call == MAT_INITIAL_MATRIX) */
3360     const PetscInt *garray;
3361     PetscInt        BsubN;
3362 
3363     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3364     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3365 
3366     /* Create local submatrices Asub and Bsub */
3367     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3368     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3369 
3370     /* Create submatrix M */
3371     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3372 
3373     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3374     asub = (Mat_MPIAIJ*)M->data;
3375 
3376     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3377     n = asub->B->cmap->N;
3378     if (BsubN > n) {
3379       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3380       const PetscInt *idx;
3381       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3382       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3383 
3384       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3385       j = 0;
3386       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3387       for (i=0; i<n; i++) {
3388         if (j >= BsubN) break;
3389         while (subgarray[i] > garray[j]) j++;
3390 
3391         if (subgarray[i] == garray[j]) {
3392           idx_new[i] = idx[j++];
3393         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3394       }
3395       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3396 
3397       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3398       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3399 
3400     } else if (BsubN < n) {
3401       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3402     }
3403 
3404     ierr = PetscFree(garray);CHKERRQ(ierr);
3405     *submat = M;
3406 
3407     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3408     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3409     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3410 
3411     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3412     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3413 
3414     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3415     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3416   }
3417   PetscFunctionReturn(0);
3418 }
3419 
3420 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3421 {
3422   PetscErrorCode ierr;
3423   IS             iscol_local=NULL,isrow_d;
3424   PetscInt       csize;
3425   PetscInt       n,i,j,start,end;
3426   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3427   MPI_Comm       comm;
3428 
3429   PetscFunctionBegin;
3430   /* If isrow has same processor distribution as mat,
3431      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3432   if (call == MAT_REUSE_MATRIX) {
3433     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3434     if (isrow_d) {
3435       sameRowDist  = PETSC_TRUE;
3436       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3437     } else {
3438       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3439       if (iscol_local) {
3440         sameRowDist  = PETSC_TRUE;
3441         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3442       }
3443     }
3444   } else {
3445     /* Check if isrow has same processor distribution as mat */
3446     sameDist[0] = PETSC_FALSE;
3447     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3448     if (!n) {
3449       sameDist[0] = PETSC_TRUE;
3450     } else {
3451       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3452       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3453       if (i >= start && j < end) {
3454         sameDist[0] = PETSC_TRUE;
3455       }
3456     }
3457 
3458     /* Check if iscol has same processor distribution as mat */
3459     sameDist[1] = PETSC_FALSE;
3460     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3461     if (!n) {
3462       sameDist[1] = PETSC_TRUE;
3463     } else {
3464       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3465       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3466       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3467     }
3468 
3469     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3470     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3471     sameRowDist = tsameDist[0];
3472   }
3473 
3474   if (sameRowDist) {
3475     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3476       /* isrow and iscol have same processor distribution as mat */
3477       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3478       PetscFunctionReturn(0);
3479     } else { /* sameRowDist */
3480       /* isrow has same processor distribution as mat */
3481       if (call == MAT_INITIAL_MATRIX) {
3482         PetscBool sorted;
3483         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3484         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3485         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3486         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3487 
3488         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3489         if (sorted) {
3490           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3491           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3492           PetscFunctionReturn(0);
3493         }
3494       } else { /* call == MAT_REUSE_MATRIX */
3495         IS    iscol_sub;
3496         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3497         if (iscol_sub) {
3498           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3499           PetscFunctionReturn(0);
3500         }
3501       }
3502     }
3503   }
3504 
3505   /* General case: iscol -> iscol_local which has global size of iscol */
3506   if (call == MAT_REUSE_MATRIX) {
3507     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3508     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3509   } else {
3510     if (!iscol_local) {
3511       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3512     }
3513   }
3514 
3515   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3516   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3517 
3518   if (call == MAT_INITIAL_MATRIX) {
3519     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3520     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3521   }
3522   PetscFunctionReturn(0);
3523 }
3524 
3525 /*@C
3526      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3527          and "off-diagonal" part of the matrix in CSR format.
3528 
3529    Collective
3530 
3531    Input Parameters:
3532 +  comm - MPI communicator
3533 .  A - "diagonal" portion of matrix
3534 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3535 -  garray - global index of B columns
3536 
3537    Output Parameter:
3538 .   mat - the matrix, with input A as its local diagonal matrix
3539    Level: advanced
3540 
3541    Notes:
3542        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3543        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3544 
3545 .seealso: MatCreateMPIAIJWithSplitArrays()
3546 @*/
3547 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3548 {
3549   PetscErrorCode ierr;
3550   Mat_MPIAIJ     *maij;
3551   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3552   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3553   PetscScalar    *oa=b->a;
3554   Mat            Bnew;
3555   PetscInt       m,n,N;
3556 
3557   PetscFunctionBegin;
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3560   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3561   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3562   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3563   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3564 
3565   /* Get global columns of mat */
3566   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3567 
3568   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3569   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3570   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3571   maij = (Mat_MPIAIJ*)(*mat)->data;
3572 
3573   (*mat)->preallocated = PETSC_TRUE;
3574 
3575   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3576   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3577 
3578   /* Set A as diagonal portion of *mat */
3579   maij->A = A;
3580 
3581   nz = oi[m];
3582   for (i=0; i<nz; i++) {
3583     col   = oj[i];
3584     oj[i] = garray[col];
3585   }
3586 
3587    /* Set Bnew as off-diagonal portion of *mat */
3588   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3589   bnew        = (Mat_SeqAIJ*)Bnew->data;
3590   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3591   maij->B     = Bnew;
3592 
3593   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3594 
3595   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3596   b->free_a       = PETSC_FALSE;
3597   b->free_ij      = PETSC_FALSE;
3598   ierr = MatDestroy(&B);CHKERRQ(ierr);
3599 
3600   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3601   bnew->free_a       = PETSC_TRUE;
3602   bnew->free_ij      = PETSC_TRUE;
3603 
3604   /* condense columns of maij->B */
3605   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3606   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3608   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3609   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3610   PetscFunctionReturn(0);
3611 }
3612 
3613 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3614 
3615 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3616 {
3617   PetscErrorCode ierr;
3618   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3619   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3620   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3621   Mat            M,Msub,B=a->B;
3622   MatScalar      *aa;
3623   Mat_SeqAIJ     *aij;
3624   PetscInt       *garray = a->garray,*colsub,Ncols;
3625   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3626   IS             iscol_sub,iscmap;
3627   const PetscInt *is_idx,*cmap;
3628   PetscBool      allcolumns=PETSC_FALSE;
3629   MPI_Comm       comm;
3630 
3631   PetscFunctionBegin;
3632   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3633 
3634   if (call == MAT_REUSE_MATRIX) {
3635     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3636     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3637     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3638 
3639     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3640     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3641 
3642     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3643     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3644 
3645     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3646 
3647   } else { /* call == MAT_INITIAL_MATRIX) */
3648     PetscBool flg;
3649 
3650     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3651     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3652 
3653     /* (1) iscol -> nonscalable iscol_local */
3654     /* Check for special case: each processor gets entire matrix columns */
3655     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3656     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3657     if (allcolumns) {
3658       iscol_sub = iscol_local;
3659       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3660       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3661 
3662     } else {
3663       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3664       PetscInt *idx,*cmap1,k;
3665       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3666       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3667       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3668       count = 0;
3669       k     = 0;
3670       for (i=0; i<Ncols; i++) {
3671         j = is_idx[i];
3672         if (j >= cstart && j < cend) {
3673           /* diagonal part of mat */
3674           idx[count]     = j;
3675           cmap1[count++] = i; /* column index in submat */
3676         } else if (Bn) {
3677           /* off-diagonal part of mat */
3678           if (j == garray[k]) {
3679             idx[count]     = j;
3680             cmap1[count++] = i;  /* column index in submat */
3681           } else if (j > garray[k]) {
3682             while (j > garray[k] && k < Bn-1) k++;
3683             if (j == garray[k]) {
3684               idx[count]     = j;
3685               cmap1[count++] = i; /* column index in submat */
3686             }
3687           }
3688         }
3689       }
3690       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3691 
3692       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3693       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3694       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3695 
3696       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3697     }
3698 
3699     /* (3) Create sequential Msub */
3700     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3701   }
3702 
3703   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3704   aij  = (Mat_SeqAIJ*)(Msub)->data;
3705   ii   = aij->i;
3706   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3707 
3708   /*
3709       m - number of local rows
3710       Ncols - number of columns (same on all processors)
3711       rstart - first row in new global matrix generated
3712   */
3713   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3714 
3715   if (call == MAT_INITIAL_MATRIX) {
3716     /* (4) Create parallel newmat */
3717     PetscMPIInt    rank,size;
3718     PetscInt       csize;
3719 
3720     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3721     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3722 
3723     /*
3724         Determine the number of non-zeros in the diagonal and off-diagonal
3725         portions of the matrix in order to do correct preallocation
3726     */
3727 
3728     /* first get start and end of "diagonal" columns */
3729     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3730     if (csize == PETSC_DECIDE) {
3731       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3732       if (mglobal == Ncols) { /* square matrix */
3733         nlocal = m;
3734       } else {
3735         nlocal = Ncols/size + ((Ncols % size) > rank);
3736       }
3737     } else {
3738       nlocal = csize;
3739     }
3740     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3741     rstart = rend - nlocal;
3742     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3743 
3744     /* next, compute all the lengths */
3745     jj    = aij->j;
3746     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3747     olens = dlens + m;
3748     for (i=0; i<m; i++) {
3749       jend = ii[i+1] - ii[i];
3750       olen = 0;
3751       dlen = 0;
3752       for (j=0; j<jend; j++) {
3753         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3754         else dlen++;
3755         jj++;
3756       }
3757       olens[i] = olen;
3758       dlens[i] = dlen;
3759     }
3760 
3761     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3762     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3763 
3764     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3765     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3766     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3767     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3768     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3769     ierr = PetscFree(dlens);CHKERRQ(ierr);
3770 
3771   } else { /* call == MAT_REUSE_MATRIX */
3772     M    = *newmat;
3773     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3774     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3775     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3776     /*
3777          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3778        rather than the slower MatSetValues().
3779     */
3780     M->was_assembled = PETSC_TRUE;
3781     M->assembled     = PETSC_FALSE;
3782   }
3783 
3784   /* (5) Set values of Msub to *newmat */
3785   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3786   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3787 
3788   jj   = aij->j;
3789   aa   = aij->a;
3790   for (i=0; i<m; i++) {
3791     row = rstart + i;
3792     nz  = ii[i+1] - ii[i];
3793     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3794     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3795     jj += nz; aa += nz;
3796   }
3797   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3798 
3799   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3800   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3801 
3802   ierr = PetscFree(colsub);CHKERRQ(ierr);
3803 
3804   /* save Msub, iscol_sub and iscmap used in processor for next request */
3805   if (call ==  MAT_INITIAL_MATRIX) {
3806     *newmat = M;
3807     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3808     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3809 
3810     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3811     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3812 
3813     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3814     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3815 
3816     if (iscol_local) {
3817       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3818       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3819     }
3820   }
3821   PetscFunctionReturn(0);
3822 }
3823 
3824 /*
3825     Not great since it makes two copies of the submatrix, first an SeqAIJ
3826   in local and then by concatenating the local matrices the end result.
3827   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3828 
3829   Note: This requires a sequential iscol with all indices.
3830 */
3831 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3832 {
3833   PetscErrorCode ierr;
3834   PetscMPIInt    rank,size;
3835   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3836   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3837   Mat            M,Mreuse;
3838   MatScalar      *aa,*vwork;
3839   MPI_Comm       comm;
3840   Mat_SeqAIJ     *aij;
3841   PetscBool      colflag,allcolumns=PETSC_FALSE;
3842 
3843   PetscFunctionBegin;
3844   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3845   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3846   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3847 
3848   /* Check for special case: each processor gets entire matrix columns */
3849   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3850   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3851   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3852 
3853   if (call ==  MAT_REUSE_MATRIX) {
3854     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3855     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3856     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3857   } else {
3858     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3859   }
3860 
3861   /*
3862       m - number of local rows
3863       n - number of columns (same on all processors)
3864       rstart - first row in new global matrix generated
3865   */
3866   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3867   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3868   if (call == MAT_INITIAL_MATRIX) {
3869     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3870     ii  = aij->i;
3871     jj  = aij->j;
3872 
3873     /*
3874         Determine the number of non-zeros in the diagonal and off-diagonal
3875         portions of the matrix in order to do correct preallocation
3876     */
3877 
3878     /* first get start and end of "diagonal" columns */
3879     if (csize == PETSC_DECIDE) {
3880       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3881       if (mglobal == n) { /* square matrix */
3882         nlocal = m;
3883       } else {
3884         nlocal = n/size + ((n % size) > rank);
3885       }
3886     } else {
3887       nlocal = csize;
3888     }
3889     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3890     rstart = rend - nlocal;
3891     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3892 
3893     /* next, compute all the lengths */
3894     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3895     olens = dlens + m;
3896     for (i=0; i<m; i++) {
3897       jend = ii[i+1] - ii[i];
3898       olen = 0;
3899       dlen = 0;
3900       for (j=0; j<jend; j++) {
3901         if (*jj < rstart || *jj >= rend) olen++;
3902         else dlen++;
3903         jj++;
3904       }
3905       olens[i] = olen;
3906       dlens[i] = dlen;
3907     }
3908     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3909     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3910     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3911     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3912     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3913     ierr = PetscFree(dlens);CHKERRQ(ierr);
3914   } else {
3915     PetscInt ml,nl;
3916 
3917     M    = *newmat;
3918     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3919     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3920     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3921     /*
3922          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3923        rather than the slower MatSetValues().
3924     */
3925     M->was_assembled = PETSC_TRUE;
3926     M->assembled     = PETSC_FALSE;
3927   }
3928   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3929   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3930   ii   = aij->i;
3931   jj   = aij->j;
3932   aa   = aij->a;
3933   for (i=0; i<m; i++) {
3934     row   = rstart + i;
3935     nz    = ii[i+1] - ii[i];
3936     cwork = jj;     jj += nz;
3937     vwork = aa;     aa += nz;
3938     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3939   }
3940 
3941   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3942   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3943   *newmat = M;
3944 
3945   /* save submatrix used in processor for next request */
3946   if (call ==  MAT_INITIAL_MATRIX) {
3947     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3948     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3949   }
3950   PetscFunctionReturn(0);
3951 }
3952 
3953 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3954 {
3955   PetscInt       m,cstart, cend,j,nnz,i,d;
3956   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3957   const PetscInt *JJ;
3958   PetscErrorCode ierr;
3959   PetscBool      nooffprocentries;
3960 
3961   PetscFunctionBegin;
3962   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3963 
3964   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3965   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3966   m      = B->rmap->n;
3967   cstart = B->cmap->rstart;
3968   cend   = B->cmap->rend;
3969   rstart = B->rmap->rstart;
3970 
3971   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3972 
3973 #if defined(PETSC_USE_DEBUG)
3974   for (i=0; i<m; i++) {
3975     nnz = Ii[i+1]- Ii[i];
3976     JJ  = J + Ii[i];
3977     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3978     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3979     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3980   }
3981 #endif
3982 
3983   for (i=0; i<m; i++) {
3984     nnz     = Ii[i+1]- Ii[i];
3985     JJ      = J + Ii[i];
3986     nnz_max = PetscMax(nnz_max,nnz);
3987     d       = 0;
3988     for (j=0; j<nnz; j++) {
3989       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3990     }
3991     d_nnz[i] = d;
3992     o_nnz[i] = nnz - d;
3993   }
3994   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3995   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3996 
3997   for (i=0; i<m; i++) {
3998     ii   = i + rstart;
3999     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4000   }
4001   nooffprocentries    = B->nooffprocentries;
4002   B->nooffprocentries = PETSC_TRUE;
4003   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4004   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4005   B->nooffprocentries = nooffprocentries;
4006 
4007   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4008   PetscFunctionReturn(0);
4009 }
4010 
4011 /*@
4012    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4013    (the default parallel PETSc format).
4014 
4015    Collective
4016 
4017    Input Parameters:
4018 +  B - the matrix
4019 .  i - the indices into j for the start of each local row (starts with zero)
4020 .  j - the column indices for each local row (starts with zero)
4021 -  v - optional values in the matrix
4022 
4023    Level: developer
4024 
4025    Notes:
4026        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4027      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4028      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4029 
4030        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4031 
4032        The format which is used for the sparse matrix input, is equivalent to a
4033     row-major ordering.. i.e for the following matrix, the input data expected is
4034     as shown
4035 
4036 $        1 0 0
4037 $        2 0 3     P0
4038 $       -------
4039 $        4 5 6     P1
4040 $
4041 $     Process0 [P0]: rows_owned=[0,1]
4042 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4043 $        j =  {0,0,2}  [size = 3]
4044 $        v =  {1,2,3}  [size = 3]
4045 $
4046 $     Process1 [P1]: rows_owned=[2]
4047 $        i =  {0,3}    [size = nrow+1  = 1+1]
4048 $        j =  {0,1,2}  [size = 3]
4049 $        v =  {4,5,6}  [size = 3]
4050 
4051 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4052           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4053 @*/
4054 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4055 {
4056   PetscErrorCode ierr;
4057 
4058   PetscFunctionBegin;
4059   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4060   PetscFunctionReturn(0);
4061 }
4062 
4063 /*@C
4064    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4065    (the default parallel PETSc format).  For good matrix assembly performance
4066    the user should preallocate the matrix storage by setting the parameters
4067    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4068    performance can be increased by more than a factor of 50.
4069 
4070    Collective
4071 
4072    Input Parameters:
4073 +  B - the matrix
4074 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4075            (same value is used for all local rows)
4076 .  d_nnz - array containing the number of nonzeros in the various rows of the
4077            DIAGONAL portion of the local submatrix (possibly different for each row)
4078            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4079            The size of this array is equal to the number of local rows, i.e 'm'.
4080            For matrices that will be factored, you must leave room for (and set)
4081            the diagonal entry even if it is zero.
4082 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4083            submatrix (same value is used for all local rows).
4084 -  o_nnz - array containing the number of nonzeros in the various rows of the
4085            OFF-DIAGONAL portion of the local submatrix (possibly different for
4086            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4087            structure. The size of this array is equal to the number
4088            of local rows, i.e 'm'.
4089 
4090    If the *_nnz parameter is given then the *_nz parameter is ignored
4091 
4092    The AIJ format (also called the Yale sparse matrix format or
4093    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4094    storage.  The stored row and column indices begin with zero.
4095    See Users-Manual: ch_mat for details.
4096 
4097    The parallel matrix is partitioned such that the first m0 rows belong to
4098    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4099    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4100 
4101    The DIAGONAL portion of the local submatrix of a processor can be defined
4102    as the submatrix which is obtained by extraction the part corresponding to
4103    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4104    first row that belongs to the processor, r2 is the last row belonging to
4105    the this processor, and c1-c2 is range of indices of the local part of a
4106    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4107    common case of a square matrix, the row and column ranges are the same and
4108    the DIAGONAL part is also square. The remaining portion of the local
4109    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4110 
4111    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4112 
4113    You can call MatGetInfo() to get information on how effective the preallocation was;
4114    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4115    You can also run with the option -info and look for messages with the string
4116    malloc in them to see if additional memory allocation was needed.
4117 
4118    Example usage:
4119 
4120    Consider the following 8x8 matrix with 34 non-zero values, that is
4121    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4122    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4123    as follows:
4124 
4125 .vb
4126             1  2  0  |  0  3  0  |  0  4
4127     Proc0   0  5  6  |  7  0  0  |  8  0
4128             9  0 10  | 11  0  0  | 12  0
4129     -------------------------------------
4130            13  0 14  | 15 16 17  |  0  0
4131     Proc1   0 18  0  | 19 20 21  |  0  0
4132             0  0  0  | 22 23  0  | 24  0
4133     -------------------------------------
4134     Proc2  25 26 27  |  0  0 28  | 29  0
4135            30  0  0  | 31 32 33  |  0 34
4136 .ve
4137 
4138    This can be represented as a collection of submatrices as:
4139 
4140 .vb
4141       A B C
4142       D E F
4143       G H I
4144 .ve
4145 
4146    Where the submatrices A,B,C are owned by proc0, D,E,F are
4147    owned by proc1, G,H,I are owned by proc2.
4148 
4149    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4150    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4151    The 'M','N' parameters are 8,8, and have the same values on all procs.
4152 
4153    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4154    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4155    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4156    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4157    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4158    matrix, ans [DF] as another SeqAIJ matrix.
4159 
4160    When d_nz, o_nz parameters are specified, d_nz storage elements are
4161    allocated for every row of the local diagonal submatrix, and o_nz
4162    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4163    One way to choose d_nz and o_nz is to use the max nonzerors per local
4164    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4165    In this case, the values of d_nz,o_nz are:
4166 .vb
4167      proc0 : dnz = 2, o_nz = 2
4168      proc1 : dnz = 3, o_nz = 2
4169      proc2 : dnz = 1, o_nz = 4
4170 .ve
4171    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4172    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4173    for proc3. i.e we are using 12+15+10=37 storage locations to store
4174    34 values.
4175 
4176    When d_nnz, o_nnz parameters are specified, the storage is specified
4177    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4178    In the above case the values for d_nnz,o_nnz are:
4179 .vb
4180      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4181      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4182      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4183 .ve
4184    Here the space allocated is sum of all the above values i.e 34, and
4185    hence pre-allocation is perfect.
4186 
4187    Level: intermediate
4188 
4189 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4190           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4191 @*/
4192 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4193 {
4194   PetscErrorCode ierr;
4195 
4196   PetscFunctionBegin;
4197   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4198   PetscValidType(B,1);
4199   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4200   PetscFunctionReturn(0);
4201 }
4202 
4203 /*@
4204      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4205          CSR format for the local rows.
4206 
4207    Collective
4208 
4209    Input Parameters:
4210 +  comm - MPI communicator
4211 .  m - number of local rows (Cannot be PETSC_DECIDE)
4212 .  n - This value should be the same as the local size used in creating the
4213        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4214        calculated if N is given) For square matrices n is almost always m.
4215 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4216 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4217 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4218 .   j - column indices
4219 -   a - matrix values
4220 
4221    Output Parameter:
4222 .   mat - the matrix
4223 
4224    Level: intermediate
4225 
4226    Notes:
4227        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4228      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4229      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4230 
4231        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4232 
4233        The format which is used for the sparse matrix input, is equivalent to a
4234     row-major ordering.. i.e for the following matrix, the input data expected is
4235     as shown
4236 
4237        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4238 
4239 $        1 0 0
4240 $        2 0 3     P0
4241 $       -------
4242 $        4 5 6     P1
4243 $
4244 $     Process0 [P0]: rows_owned=[0,1]
4245 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4246 $        j =  {0,0,2}  [size = 3]
4247 $        v =  {1,2,3}  [size = 3]
4248 $
4249 $     Process1 [P1]: rows_owned=[2]
4250 $        i =  {0,3}    [size = nrow+1  = 1+1]
4251 $        j =  {0,1,2}  [size = 3]
4252 $        v =  {4,5,6}  [size = 3]
4253 
4254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4255           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4256 @*/
4257 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4258 {
4259   PetscErrorCode ierr;
4260 
4261   PetscFunctionBegin;
4262   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4263   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4264   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4265   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4266   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4267   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4268   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4269   PetscFunctionReturn(0);
4270 }
4271 
4272 /*@
4273      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4274          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4275 
4276    Collective
4277 
4278    Input Parameters:
4279 +  mat - the matrix
4280 .  m - number of local rows (Cannot be PETSC_DECIDE)
4281 .  n - This value should be the same as the local size used in creating the
4282        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4283        calculated if N is given) For square matrices n is almost always m.
4284 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4285 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4286 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4287 .  J - column indices
4288 -  v - matrix values
4289 
4290    Level: intermediate
4291 
4292 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4293           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4294 @*/
4295 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4296 {
4297   PetscErrorCode ierr;
4298   PetscInt       cstart,nnz,i,j;
4299   PetscInt       *ld;
4300   PetscBool      nooffprocentries;
4301   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4302   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4303   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4304   const PetscInt *Adi = Ad->i;
4305   PetscInt       ldi,Iii,md;
4306 
4307   PetscFunctionBegin;
4308   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4309   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4310   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4311   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4312 
4313   cstart = mat->cmap->rstart;
4314   if (!Aij->ld) {
4315     /* count number of entries below block diagonal */
4316     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4317     Aij->ld = ld;
4318     for (i=0; i<m; i++) {
4319       nnz  = Ii[i+1]- Ii[i];
4320       j     = 0;
4321       while  (J[j] < cstart && j < nnz) {j++;}
4322       J    += nnz;
4323       ld[i] = j;
4324     }
4325   } else {
4326     ld = Aij->ld;
4327   }
4328 
4329   for (i=0; i<m; i++) {
4330     nnz  = Ii[i+1]- Ii[i];
4331     Iii  = Ii[i];
4332     ldi  = ld[i];
4333     md   = Adi[i+1]-Adi[i];
4334     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4335     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4336     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4337     ad  += md;
4338     ao  += nnz - md;
4339   }
4340   nooffprocentries      = mat->nooffprocentries;
4341   mat->nooffprocentries = PETSC_TRUE;
4342   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4343   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4344   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4345   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4346   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4347   mat->nooffprocentries = nooffprocentries;
4348   PetscFunctionReturn(0);
4349 }
4350 
4351 /*@C
4352    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4353    (the default parallel PETSc format).  For good matrix assembly performance
4354    the user should preallocate the matrix storage by setting the parameters
4355    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4356    performance can be increased by more than a factor of 50.
4357 
4358    Collective
4359 
4360    Input Parameters:
4361 +  comm - MPI communicator
4362 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4363            This value should be the same as the local size used in creating the
4364            y vector for the matrix-vector product y = Ax.
4365 .  n - This value should be the same as the local size used in creating the
4366        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4367        calculated if N is given) For square matrices n is almost always m.
4368 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4369 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4370 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4371            (same value is used for all local rows)
4372 .  d_nnz - array containing the number of nonzeros in the various rows of the
4373            DIAGONAL portion of the local submatrix (possibly different for each row)
4374            or NULL, if d_nz is used to specify the nonzero structure.
4375            The size of this array is equal to the number of local rows, i.e 'm'.
4376 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4377            submatrix (same value is used for all local rows).
4378 -  o_nnz - array containing the number of nonzeros in the various rows of the
4379            OFF-DIAGONAL portion of the local submatrix (possibly different for
4380            each row) or NULL, if o_nz is used to specify the nonzero
4381            structure. The size of this array is equal to the number
4382            of local rows, i.e 'm'.
4383 
4384    Output Parameter:
4385 .  A - the matrix
4386 
4387    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4388    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4389    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4390 
4391    Notes:
4392    If the *_nnz parameter is given then the *_nz parameter is ignored
4393 
4394    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4395    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4396    storage requirements for this matrix.
4397 
4398    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4399    processor than it must be used on all processors that share the object for
4400    that argument.
4401 
4402    The user MUST specify either the local or global matrix dimensions
4403    (possibly both).
4404 
4405    The parallel matrix is partitioned across processors such that the
4406    first m0 rows belong to process 0, the next m1 rows belong to
4407    process 1, the next m2 rows belong to process 2 etc.. where
4408    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4409    values corresponding to [m x N] submatrix.
4410 
4411    The columns are logically partitioned with the n0 columns belonging
4412    to 0th partition, the next n1 columns belonging to the next
4413    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4414 
4415    The DIAGONAL portion of the local submatrix on any given processor
4416    is the submatrix corresponding to the rows and columns m,n
4417    corresponding to the given processor. i.e diagonal matrix on
4418    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4419    etc. The remaining portion of the local submatrix [m x (N-n)]
4420    constitute the OFF-DIAGONAL portion. The example below better
4421    illustrates this concept.
4422 
4423    For a square global matrix we define each processor's diagonal portion
4424    to be its local rows and the corresponding columns (a square submatrix);
4425    each processor's off-diagonal portion encompasses the remainder of the
4426    local matrix (a rectangular submatrix).
4427 
4428    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4429 
4430    When calling this routine with a single process communicator, a matrix of
4431    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4432    type of communicator, use the construction mechanism
4433 .vb
4434      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4435 .ve
4436 
4437 $     MatCreate(...,&A);
4438 $     MatSetType(A,MATMPIAIJ);
4439 $     MatSetSizes(A, m,n,M,N);
4440 $     MatMPIAIJSetPreallocation(A,...);
4441 
4442    By default, this format uses inodes (identical nodes) when possible.
4443    We search for consecutive rows with the same nonzero structure, thereby
4444    reusing matrix information to achieve increased efficiency.
4445 
4446    Options Database Keys:
4447 +  -mat_no_inode  - Do not use inodes
4448 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4449 
4450 
4451 
4452    Example usage:
4453 
4454    Consider the following 8x8 matrix with 34 non-zero values, that is
4455    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4456    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4457    as follows
4458 
4459 .vb
4460             1  2  0  |  0  3  0  |  0  4
4461     Proc0   0  5  6  |  7  0  0  |  8  0
4462             9  0 10  | 11  0  0  | 12  0
4463     -------------------------------------
4464            13  0 14  | 15 16 17  |  0  0
4465     Proc1   0 18  0  | 19 20 21  |  0  0
4466             0  0  0  | 22 23  0  | 24  0
4467     -------------------------------------
4468     Proc2  25 26 27  |  0  0 28  | 29  0
4469            30  0  0  | 31 32 33  |  0 34
4470 .ve
4471 
4472    This can be represented as a collection of submatrices as
4473 
4474 .vb
4475       A B C
4476       D E F
4477       G H I
4478 .ve
4479 
4480    Where the submatrices A,B,C are owned by proc0, D,E,F are
4481    owned by proc1, G,H,I are owned by proc2.
4482 
4483    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4484    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4485    The 'M','N' parameters are 8,8, and have the same values on all procs.
4486 
4487    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4488    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4489    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4490    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4491    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4492    matrix, ans [DF] as another SeqAIJ matrix.
4493 
4494    When d_nz, o_nz parameters are specified, d_nz storage elements are
4495    allocated for every row of the local diagonal submatrix, and o_nz
4496    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4497    One way to choose d_nz and o_nz is to use the max nonzerors per local
4498    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4499    In this case, the values of d_nz,o_nz are
4500 .vb
4501      proc0 : dnz = 2, o_nz = 2
4502      proc1 : dnz = 3, o_nz = 2
4503      proc2 : dnz = 1, o_nz = 4
4504 .ve
4505    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4506    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4507    for proc3. i.e we are using 12+15+10=37 storage locations to store
4508    34 values.
4509 
4510    When d_nnz, o_nnz parameters are specified, the storage is specified
4511    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4512    In the above case the values for d_nnz,o_nnz are
4513 .vb
4514      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4515      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4516      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4517 .ve
4518    Here the space allocated is sum of all the above values i.e 34, and
4519    hence pre-allocation is perfect.
4520 
4521    Level: intermediate
4522 
4523 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4524           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4525 @*/
4526 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4527 {
4528   PetscErrorCode ierr;
4529   PetscMPIInt    size;
4530 
4531   PetscFunctionBegin;
4532   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4533   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4535   if (size > 1) {
4536     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4537     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4538   } else {
4539     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4540     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4541   }
4542   PetscFunctionReturn(0);
4543 }
4544 
4545 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4546 {
4547   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4548   PetscBool      flg;
4549   PetscErrorCode ierr;
4550 
4551   PetscFunctionBegin;
4552   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4553   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4554   if (Ad)     *Ad     = a->A;
4555   if (Ao)     *Ao     = a->B;
4556   if (colmap) *colmap = a->garray;
4557   PetscFunctionReturn(0);
4558 }
4559 
4560 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4561 {
4562   PetscErrorCode ierr;
4563   PetscInt       m,N,i,rstart,nnz,Ii;
4564   PetscInt       *indx;
4565   PetscScalar    *values;
4566 
4567   PetscFunctionBegin;
4568   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4569   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4570     PetscInt       *dnz,*onz,sum,bs,cbs;
4571 
4572     if (n == PETSC_DECIDE) {
4573       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4574     }
4575     /* Check sum(n) = N */
4576     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4577     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4578 
4579     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4580     rstart -= m;
4581 
4582     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4583     for (i=0; i<m; i++) {
4584       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4585       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4586       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4587     }
4588 
4589     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4590     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4591     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4592     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4593     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4594     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4595     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4596     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4597   }
4598 
4599   /* numeric phase */
4600   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4601   for (i=0; i<m; i++) {
4602     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4603     Ii   = i + rstart;
4604     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4605     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4606   }
4607   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4608   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4609   PetscFunctionReturn(0);
4610 }
4611 
4612 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4613 {
4614   PetscErrorCode    ierr;
4615   PetscMPIInt       rank;
4616   PetscInt          m,N,i,rstart,nnz;
4617   size_t            len;
4618   const PetscInt    *indx;
4619   PetscViewer       out;
4620   char              *name;
4621   Mat               B;
4622   const PetscScalar *values;
4623 
4624   PetscFunctionBegin;
4625   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4626   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4627   /* Should this be the type of the diagonal block of A? */
4628   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4629   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4630   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4631   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4632   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4633   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4634   for (i=0; i<m; i++) {
4635     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4636     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4637     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4638   }
4639   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4640   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4641 
4642   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4643   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4644   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4645   sprintf(name,"%s.%d",outfile,rank);
4646   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4647   ierr = PetscFree(name);CHKERRQ(ierr);
4648   ierr = MatView(B,out);CHKERRQ(ierr);
4649   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4650   ierr = MatDestroy(&B);CHKERRQ(ierr);
4651   PetscFunctionReturn(0);
4652 }
4653 
4654 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4655 {
4656   PetscErrorCode      ierr;
4657   Mat_Merge_SeqsToMPI *merge;
4658   PetscContainer      container;
4659 
4660   PetscFunctionBegin;
4661   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4662   if (container) {
4663     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4665     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4666     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4667     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4668     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4669     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4670     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4671     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4672     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4673     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4674     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4675     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4676     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4677     ierr = PetscFree(merge);CHKERRQ(ierr);
4678     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4679   }
4680   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4681   PetscFunctionReturn(0);
4682 }
4683 
4684 #include <../src/mat/utils/freespace.h>
4685 #include <petscbt.h>
4686 
4687 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4688 {
4689   PetscErrorCode      ierr;
4690   MPI_Comm            comm;
4691   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4692   PetscMPIInt         size,rank,taga,*len_s;
4693   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4694   PetscInt            proc,m;
4695   PetscInt            **buf_ri,**buf_rj;
4696   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4697   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4698   MPI_Request         *s_waits,*r_waits;
4699   MPI_Status          *status;
4700   MatScalar           *aa=a->a;
4701   MatScalar           **abuf_r,*ba_i;
4702   Mat_Merge_SeqsToMPI *merge;
4703   PetscContainer      container;
4704 
4705   PetscFunctionBegin;
4706   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4707   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4708 
4709   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4710   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4711 
4712   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4713   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4714 
4715   bi     = merge->bi;
4716   bj     = merge->bj;
4717   buf_ri = merge->buf_ri;
4718   buf_rj = merge->buf_rj;
4719 
4720   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4721   owners = merge->rowmap->range;
4722   len_s  = merge->len_s;
4723 
4724   /* send and recv matrix values */
4725   /*-----------------------------*/
4726   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4727   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4728 
4729   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4730   for (proc=0,k=0; proc<size; proc++) {
4731     if (!len_s[proc]) continue;
4732     i    = owners[proc];
4733     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4734     k++;
4735   }
4736 
4737   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4738   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4739   ierr = PetscFree(status);CHKERRQ(ierr);
4740 
4741   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4742   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4743 
4744   /* insert mat values of mpimat */
4745   /*----------------------------*/
4746   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4747   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4748 
4749   for (k=0; k<merge->nrecv; k++) {
4750     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4751     nrows       = *(buf_ri_k[k]);
4752     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4753     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4754   }
4755 
4756   /* set values of ba */
4757   m = merge->rowmap->n;
4758   for (i=0; i<m; i++) {
4759     arow = owners[rank] + i;
4760     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4761     bnzi = bi[i+1] - bi[i];
4762     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4763 
4764     /* add local non-zero vals of this proc's seqmat into ba */
4765     anzi   = ai[arow+1] - ai[arow];
4766     aj     = a->j + ai[arow];
4767     aa     = a->a + ai[arow];
4768     nextaj = 0;
4769     for (j=0; nextaj<anzi; j++) {
4770       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4771         ba_i[j] += aa[nextaj++];
4772       }
4773     }
4774 
4775     /* add received vals into ba */
4776     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4777       /* i-th row */
4778       if (i == *nextrow[k]) {
4779         anzi   = *(nextai[k]+1) - *nextai[k];
4780         aj     = buf_rj[k] + *(nextai[k]);
4781         aa     = abuf_r[k] + *(nextai[k]);
4782         nextaj = 0;
4783         for (j=0; nextaj<anzi; j++) {
4784           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4785             ba_i[j] += aa[nextaj++];
4786           }
4787         }
4788         nextrow[k]++; nextai[k]++;
4789       }
4790     }
4791     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4792   }
4793   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4794   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4795 
4796   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4797   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4798   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4799   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4800   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4801   PetscFunctionReturn(0);
4802 }
4803 
4804 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4805 {
4806   PetscErrorCode      ierr;
4807   Mat                 B_mpi;
4808   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4809   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4810   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4811   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4812   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4813   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4814   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4815   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4816   MPI_Status          *status;
4817   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4818   PetscBT             lnkbt;
4819   Mat_Merge_SeqsToMPI *merge;
4820   PetscContainer      container;
4821 
4822   PetscFunctionBegin;
4823   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4824 
4825   /* make sure it is a PETSc comm */
4826   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4827   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4828   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4829 
4830   ierr = PetscNew(&merge);CHKERRQ(ierr);
4831   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4832 
4833   /* determine row ownership */
4834   /*---------------------------------------------------------*/
4835   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4836   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4837   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4838   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4839   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4840   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4841   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4842 
4843   m      = merge->rowmap->n;
4844   owners = merge->rowmap->range;
4845 
4846   /* determine the number of messages to send, their lengths */
4847   /*---------------------------------------------------------*/
4848   len_s = merge->len_s;
4849 
4850   len          = 0; /* length of buf_si[] */
4851   merge->nsend = 0;
4852   for (proc=0; proc<size; proc++) {
4853     len_si[proc] = 0;
4854     if (proc == rank) {
4855       len_s[proc] = 0;
4856     } else {
4857       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4858       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4859     }
4860     if (len_s[proc]) {
4861       merge->nsend++;
4862       nrows = 0;
4863       for (i=owners[proc]; i<owners[proc+1]; i++) {
4864         if (ai[i+1] > ai[i]) nrows++;
4865       }
4866       len_si[proc] = 2*(nrows+1);
4867       len         += len_si[proc];
4868     }
4869   }
4870 
4871   /* determine the number and length of messages to receive for ij-structure */
4872   /*-------------------------------------------------------------------------*/
4873   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4874   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4875 
4876   /* post the Irecv of j-structure */
4877   /*-------------------------------*/
4878   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4879   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4880 
4881   /* post the Isend of j-structure */
4882   /*--------------------------------*/
4883   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4884 
4885   for (proc=0, k=0; proc<size; proc++) {
4886     if (!len_s[proc]) continue;
4887     i    = owners[proc];
4888     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4889     k++;
4890   }
4891 
4892   /* receives and sends of j-structure are complete */
4893   /*------------------------------------------------*/
4894   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4895   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4896 
4897   /* send and recv i-structure */
4898   /*---------------------------*/
4899   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4900   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4901 
4902   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4903   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4904   for (proc=0,k=0; proc<size; proc++) {
4905     if (!len_s[proc]) continue;
4906     /* form outgoing message for i-structure:
4907          buf_si[0]:                 nrows to be sent
4908                [1:nrows]:           row index (global)
4909                [nrows+1:2*nrows+1]: i-structure index
4910     */
4911     /*-------------------------------------------*/
4912     nrows       = len_si[proc]/2 - 1;
4913     buf_si_i    = buf_si + nrows+1;
4914     buf_si[0]   = nrows;
4915     buf_si_i[0] = 0;
4916     nrows       = 0;
4917     for (i=owners[proc]; i<owners[proc+1]; i++) {
4918       anzi = ai[i+1] - ai[i];
4919       if (anzi) {
4920         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4921         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4922         nrows++;
4923       }
4924     }
4925     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4926     k++;
4927     buf_si += len_si[proc];
4928   }
4929 
4930   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4931   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4932 
4933   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4934   for (i=0; i<merge->nrecv; i++) {
4935     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4936   }
4937 
4938   ierr = PetscFree(len_si);CHKERRQ(ierr);
4939   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4940   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4941   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4942   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4943   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4944   ierr = PetscFree(status);CHKERRQ(ierr);
4945 
4946   /* compute a local seq matrix in each processor */
4947   /*----------------------------------------------*/
4948   /* allocate bi array and free space for accumulating nonzero column info */
4949   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4950   bi[0] = 0;
4951 
4952   /* create and initialize a linked list */
4953   nlnk = N+1;
4954   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4955 
4956   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4957   len  = ai[owners[rank+1]] - ai[owners[rank]];
4958   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4959 
4960   current_space = free_space;
4961 
4962   /* determine symbolic info for each local row */
4963   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4964 
4965   for (k=0; k<merge->nrecv; k++) {
4966     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4967     nrows       = *buf_ri_k[k];
4968     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4969     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4970   }
4971 
4972   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4973   len  = 0;
4974   for (i=0; i<m; i++) {
4975     bnzi = 0;
4976     /* add local non-zero cols of this proc's seqmat into lnk */
4977     arow  = owners[rank] + i;
4978     anzi  = ai[arow+1] - ai[arow];
4979     aj    = a->j + ai[arow];
4980     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4981     bnzi += nlnk;
4982     /* add received col data into lnk */
4983     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4984       if (i == *nextrow[k]) { /* i-th row */
4985         anzi  = *(nextai[k]+1) - *nextai[k];
4986         aj    = buf_rj[k] + *nextai[k];
4987         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4988         bnzi += nlnk;
4989         nextrow[k]++; nextai[k]++;
4990       }
4991     }
4992     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4993 
4994     /* if free space is not available, make more free space */
4995     if (current_space->local_remaining<bnzi) {
4996       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4997       nspacedouble++;
4998     }
4999     /* copy data into free space, then initialize lnk */
5000     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5001     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5002 
5003     current_space->array           += bnzi;
5004     current_space->local_used      += bnzi;
5005     current_space->local_remaining -= bnzi;
5006 
5007     bi[i+1] = bi[i] + bnzi;
5008   }
5009 
5010   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5011 
5012   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5013   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5014   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5015 
5016   /* create symbolic parallel matrix B_mpi */
5017   /*---------------------------------------*/
5018   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5019   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5020   if (n==PETSC_DECIDE) {
5021     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5022   } else {
5023     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5024   }
5025   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5026   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5027   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5028   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5029   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5030 
5031   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5032   B_mpi->assembled    = PETSC_FALSE;
5033   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5034   merge->bi           = bi;
5035   merge->bj           = bj;
5036   merge->buf_ri       = buf_ri;
5037   merge->buf_rj       = buf_rj;
5038   merge->coi          = NULL;
5039   merge->coj          = NULL;
5040   merge->owners_co    = NULL;
5041 
5042   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5043 
5044   /* attach the supporting struct to B_mpi for reuse */
5045   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5046   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5047   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5048   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5049   *mpimat = B_mpi;
5050 
5051   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5052   PetscFunctionReturn(0);
5053 }
5054 
5055 /*@C
5056       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5057                  matrices from each processor
5058 
5059     Collective
5060 
5061    Input Parameters:
5062 +    comm - the communicators the parallel matrix will live on
5063 .    seqmat - the input sequential matrices
5064 .    m - number of local rows (or PETSC_DECIDE)
5065 .    n - number of local columns (or PETSC_DECIDE)
5066 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5067 
5068    Output Parameter:
5069 .    mpimat - the parallel matrix generated
5070 
5071     Level: advanced
5072 
5073    Notes:
5074      The dimensions of the sequential matrix in each processor MUST be the same.
5075      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5076      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5077 @*/
5078 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5079 {
5080   PetscErrorCode ierr;
5081   PetscMPIInt    size;
5082 
5083   PetscFunctionBegin;
5084   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5085   if (size == 1) {
5086     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5087     if (scall == MAT_INITIAL_MATRIX) {
5088       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5089     } else {
5090       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5091     }
5092     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5093     PetscFunctionReturn(0);
5094   }
5095   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5096   if (scall == MAT_INITIAL_MATRIX) {
5097     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5098   }
5099   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5100   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5101   PetscFunctionReturn(0);
5102 }
5103 
5104 /*@
5105      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5106           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5107           with MatGetSize()
5108 
5109     Not Collective
5110 
5111    Input Parameters:
5112 +    A - the matrix
5113 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5114 
5115    Output Parameter:
5116 .    A_loc - the local sequential matrix generated
5117 
5118     Level: developer
5119 
5120    Notes:
5121      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5122      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5123      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5124      modify the values of the returned A_loc.
5125 
5126 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5127 
5128 @*/
5129 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5130 {
5131   PetscErrorCode ierr;
5132   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5133   Mat_SeqAIJ     *mat,*a,*b;
5134   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5135   MatScalar      *aa,*ba,*cam;
5136   PetscScalar    *ca;
5137   PetscMPIInt    size;
5138   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5139   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5140   PetscBool      match;
5141 
5142   PetscFunctionBegin;
5143   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5144   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5145   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5146   if (size == 1) {
5147     if (scall == MAT_INITIAL_MATRIX) {
5148       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5149       *A_loc = mpimat->A;
5150     } else if (scall == MAT_REUSE_MATRIX) {
5151       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5152     }
5153     PetscFunctionReturn(0);
5154   }
5155 
5156   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5157   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5158   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5159   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5160   aa = a->a; ba = b->a;
5161   if (scall == MAT_INITIAL_MATRIX) {
5162     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5163     ci[0] = 0;
5164     for (i=0; i<am; i++) {
5165       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5166     }
5167     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5168     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5169     k    = 0;
5170     for (i=0; i<am; i++) {
5171       ncols_o = bi[i+1] - bi[i];
5172       ncols_d = ai[i+1] - ai[i];
5173       /* off-diagonal portion of A */
5174       for (jo=0; jo<ncols_o; jo++) {
5175         col = cmap[*bj];
5176         if (col >= cstart) break;
5177         cj[k]   = col; bj++;
5178         ca[k++] = *ba++;
5179       }
5180       /* diagonal portion of A */
5181       for (j=0; j<ncols_d; j++) {
5182         cj[k]   = cstart + *aj++;
5183         ca[k++] = *aa++;
5184       }
5185       /* off-diagonal portion of A */
5186       for (j=jo; j<ncols_o; j++) {
5187         cj[k]   = cmap[*bj++];
5188         ca[k++] = *ba++;
5189       }
5190     }
5191     /* put together the new matrix */
5192     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5193     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5194     /* Since these are PETSc arrays, change flags to free them as necessary. */
5195     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5196     mat->free_a  = PETSC_TRUE;
5197     mat->free_ij = PETSC_TRUE;
5198     mat->nonew   = 0;
5199   } else if (scall == MAT_REUSE_MATRIX) {
5200     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5201     ci = mat->i; cj = mat->j; cam = mat->a;
5202     for (i=0; i<am; i++) {
5203       /* off-diagonal portion of A */
5204       ncols_o = bi[i+1] - bi[i];
5205       for (jo=0; jo<ncols_o; jo++) {
5206         col = cmap[*bj];
5207         if (col >= cstart) break;
5208         *cam++ = *ba++; bj++;
5209       }
5210       /* diagonal portion of A */
5211       ncols_d = ai[i+1] - ai[i];
5212       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5213       /* off-diagonal portion of A */
5214       for (j=jo; j<ncols_o; j++) {
5215         *cam++ = *ba++; bj++;
5216       }
5217     }
5218   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5219   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5220   PetscFunctionReturn(0);
5221 }
5222 
5223 /*@C
5224      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5225 
5226     Not Collective
5227 
5228    Input Parameters:
5229 +    A - the matrix
5230 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5231 -    row, col - index sets of rows and columns to extract (or NULL)
5232 
5233    Output Parameter:
5234 .    A_loc - the local sequential matrix generated
5235 
5236     Level: developer
5237 
5238 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5239 
5240 @*/
5241 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5242 {
5243   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5244   PetscErrorCode ierr;
5245   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5246   IS             isrowa,iscola;
5247   Mat            *aloc;
5248   PetscBool      match;
5249 
5250   PetscFunctionBegin;
5251   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5252   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5253   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5254   if (!row) {
5255     start = A->rmap->rstart; end = A->rmap->rend;
5256     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5257   } else {
5258     isrowa = *row;
5259   }
5260   if (!col) {
5261     start = A->cmap->rstart;
5262     cmap  = a->garray;
5263     nzA   = a->A->cmap->n;
5264     nzB   = a->B->cmap->n;
5265     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5266     ncols = 0;
5267     for (i=0; i<nzB; i++) {
5268       if (cmap[i] < start) idx[ncols++] = cmap[i];
5269       else break;
5270     }
5271     imark = i;
5272     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5273     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5274     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5275   } else {
5276     iscola = *col;
5277   }
5278   if (scall != MAT_INITIAL_MATRIX) {
5279     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5280     aloc[0] = *A_loc;
5281   }
5282   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5283   if (!col) { /* attach global id of condensed columns */
5284     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5285   }
5286   *A_loc = aloc[0];
5287   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5288   if (!row) {
5289     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5290   }
5291   if (!col) {
5292     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5293   }
5294   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5295   PetscFunctionReturn(0);
5296 }
5297 
5298 /*
5299  * Destroy a mat that may be composed with PetscSF communication objects.
5300  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5301  * */
5302 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5303 {
5304   PetscSF          sf,osf;
5305   IS               map;
5306   PetscErrorCode   ierr;
5307 
5308   PetscFunctionBegin;
5309   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5310   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5311   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5312   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5313   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5314   ierr = ISDestroy(&map);CHKERRQ(ierr);
5315   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5316   PetscFunctionReturn(0);
5317 }
5318 
5319 /*
5320  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5321  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5322  * on a global size.
5323  * */
5324 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5325 {
5326   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5327   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5328   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5329   PetscMPIInt              owner;
5330   PetscSFNode              *iremote,*oiremote;
5331   const PetscInt           *lrowindices;
5332   PetscErrorCode           ierr;
5333   PetscSF                  sf,osf;
5334   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5335   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5336   MPI_Comm                 comm;
5337   ISLocalToGlobalMapping   mapping;
5338 
5339   PetscFunctionBegin;
5340   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5341   /* plocalsize is the number of roots
5342    * nrows is the number of leaves
5343    * */
5344   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5345   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5346   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5347   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5348   for (i=0;i<nrows;i++) {
5349     /* Find a remote index and an owner for a row
5350      * The row could be local or remote
5351      * */
5352     owner = 0;
5353     lidx  = 0;
5354     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5355     iremote[i].index = lidx;
5356     iremote[i].rank  = owner;
5357   }
5358   /* Create SF to communicate how many nonzero columns for each row */
5359   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5360   /* SF will figure out the number of nonzero colunms for each row, and their
5361    * offsets
5362    * */
5363   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5364   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5365   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5366 
5367   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5368   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5369   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5370   roffsets[0] = 0;
5371   roffsets[1] = 0;
5372   for (i=0;i<plocalsize;i++) {
5373     /* diag */
5374     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5375     /* off diag */
5376     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5377     /* compute offsets so that we relative location for each row */
5378     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5379     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5380   }
5381   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5382   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5383   /* 'r' means root, and 'l' means leaf */
5384   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5385   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5386   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5387   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5388   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5389   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5390   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5391   dntotalcols = 0;
5392   ontotalcols = 0;
5393   ncol = 0;
5394   for (i=0;i<nrows;i++) {
5395     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5396     ncol = PetscMax(pnnz[i],ncol);
5397     /* diag */
5398     dntotalcols += nlcols[i*2+0];
5399     /* off diag */
5400     ontotalcols += nlcols[i*2+1];
5401   }
5402   /* We do not need to figure the right number of columns
5403    * since all the calculations will be done by going through the raw data
5404    * */
5405   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5406   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5407   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5408   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5409   /* diag */
5410   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5411   /* off diag */
5412   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5413   /* diag */
5414   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5415   /* off diag */
5416   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5417   dntotalcols = 0;
5418   ontotalcols = 0;
5419   ntotalcols  = 0;
5420   for (i=0;i<nrows;i++) {
5421     owner = 0;
5422     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5423     /* Set iremote for diag matrix */
5424     for (j=0;j<nlcols[i*2+0];j++) {
5425       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5426       iremote[dntotalcols].rank    = owner;
5427       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5428       ilocal[dntotalcols++]        = ntotalcols++;
5429     }
5430     /* off diag */
5431     for (j=0;j<nlcols[i*2+1];j++) {
5432       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5433       oiremote[ontotalcols].rank    = owner;
5434       oilocal[ontotalcols++]        = ntotalcols++;
5435     }
5436   }
5437   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5438   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5439   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5440   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5441   /* P serves as roots and P_oth is leaves
5442    * Diag matrix
5443    * */
5444   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5445   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5446   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5447 
5448   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5449   /* Off diag */
5450   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5451   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5452   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5453   /* We operate on the matrix internal data for saving memory */
5454   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5455   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5456   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5457   /* Convert to global indices for diag matrix */
5458   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5459   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5460   /* We want P_oth store global indices */
5461   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5462   /* Use memory scalable approach */
5463   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5464   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5465   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5466   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5467   /* Convert back to local indices */
5468   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5469   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5470   nout = 0;
5471   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5472   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5473   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5474   /* Exchange values */
5475   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5476   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5477   /* Stop PETSc from shrinking memory */
5478   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5479   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5480   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5481   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5482   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5483   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5484   /* ``New MatDestroy" takes care of PetscSF objects as well */
5485   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5486   PetscFunctionReturn(0);
5487 }
5488 
5489 /*
5490  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5491  * This supports MPIAIJ and MAIJ
5492  * */
5493 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5494 {
5495   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5496   Mat_SeqAIJ            *p_oth;
5497   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5498   IS                    rows,map;
5499   PetscHMapI            hamp;
5500   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5501   MPI_Comm              comm;
5502   PetscSF               sf,osf;
5503   PetscBool             has;
5504   PetscErrorCode        ierr;
5505 
5506   PetscFunctionBegin;
5507   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5508   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5509   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5510    *  and then create a submatrix (that often is an overlapping matrix)
5511    * */
5512   if (reuse==MAT_INITIAL_MATRIX) {
5513     /* Use a hash table to figure out unique keys */
5514     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5515     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5516     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5517     count = 0;
5518     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5519     for (i=0;i<a->B->cmap->n;i++) {
5520       key  = a->garray[i]/dof;
5521       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5522       if (!has) {
5523         mapping[i] = count;
5524         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5525       } else {
5526         /* Current 'i' has the same value the previous step */
5527         mapping[i] = count-1;
5528       }
5529     }
5530     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5531     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5532     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5533     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5534     off = 0;
5535     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5536     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5537     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5538     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5539     /* In case, the matrix was already created but users want to recreate the matrix */
5540     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5541     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5542     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5543     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5544   } else if (reuse==MAT_REUSE_MATRIX) {
5545     /* If matrix was already created, we simply update values using SF objects
5546      * that as attached to the matrix ealier.
5547      *  */
5548     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5549     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5550     if (!sf || !osf) {
5551       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5552     }
5553     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5554     /* Update values in place */
5555     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5556     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5557     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5558     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5559   } else {
5560     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5561   }
5562   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5563   PetscFunctionReturn(0);
5564 }
5565 
5566 /*@C
5567     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5568 
5569     Collective on Mat
5570 
5571    Input Parameters:
5572 +    A,B - the matrices in mpiaij format
5573 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5574 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5575 
5576    Output Parameter:
5577 +    rowb, colb - index sets of rows and columns of B to extract
5578 -    B_seq - the sequential matrix generated
5579 
5580     Level: developer
5581 
5582 @*/
5583 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5584 {
5585   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5586   PetscErrorCode ierr;
5587   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5588   IS             isrowb,iscolb;
5589   Mat            *bseq=NULL;
5590 
5591   PetscFunctionBegin;
5592   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5593     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5594   }
5595   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5596 
5597   if (scall == MAT_INITIAL_MATRIX) {
5598     start = A->cmap->rstart;
5599     cmap  = a->garray;
5600     nzA   = a->A->cmap->n;
5601     nzB   = a->B->cmap->n;
5602     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5603     ncols = 0;
5604     for (i=0; i<nzB; i++) {  /* row < local row index */
5605       if (cmap[i] < start) idx[ncols++] = cmap[i];
5606       else break;
5607     }
5608     imark = i;
5609     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5610     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5611     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5612     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5613   } else {
5614     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5615     isrowb  = *rowb; iscolb = *colb;
5616     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5617     bseq[0] = *B_seq;
5618   }
5619   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5620   *B_seq = bseq[0];
5621   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5622   if (!rowb) {
5623     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5624   } else {
5625     *rowb = isrowb;
5626   }
5627   if (!colb) {
5628     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5629   } else {
5630     *colb = iscolb;
5631   }
5632   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5633   PetscFunctionReturn(0);
5634 }
5635 
5636 /*
5637     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5638     of the OFF-DIAGONAL portion of local A
5639 
5640     Collective on Mat
5641 
5642    Input Parameters:
5643 +    A,B - the matrices in mpiaij format
5644 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5645 
5646    Output Parameter:
5647 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5648 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5649 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5650 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5651 
5652     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5653      for this matrix. This is not desirable..
5654 
5655     Level: developer
5656 
5657 */
5658 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5659 {
5660   PetscErrorCode         ierr;
5661   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5662   Mat_SeqAIJ             *b_oth;
5663   VecScatter             ctx;
5664   MPI_Comm               comm;
5665   const PetscMPIInt      *rprocs,*sprocs;
5666   const PetscInt         *srow,*rstarts,*sstarts;
5667   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5668   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5669   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5670   MPI_Request            *rwaits = NULL,*swaits = NULL;
5671   MPI_Status             rstatus;
5672   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5673 
5674   PetscFunctionBegin;
5675   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5676   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5677 
5678   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5679     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5680   }
5681   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5682   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5683 
5684   if (size == 1) {
5685     startsj_s = NULL;
5686     bufa_ptr  = NULL;
5687     *B_oth    = NULL;
5688     PetscFunctionReturn(0);
5689   }
5690 
5691   ctx = a->Mvctx;
5692   tag = ((PetscObject)ctx)->tag;
5693 
5694   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5695   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5696   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5697   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5698   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5699   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5700   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5701 
5702   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5703   if (scall == MAT_INITIAL_MATRIX) {
5704     /* i-array */
5705     /*---------*/
5706     /*  post receives */
5707     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5708     for (i=0; i<nrecvs; i++) {
5709       rowlen = rvalues + rstarts[i]*rbs;
5710       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5711       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5712     }
5713 
5714     /* pack the outgoing message */
5715     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5716 
5717     sstartsj[0] = 0;
5718     rstartsj[0] = 0;
5719     len         = 0; /* total length of j or a array to be sent */
5720     if (nsends) {
5721       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5722       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5723     }
5724     for (i=0; i<nsends; i++) {
5725       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5726       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5727       for (j=0; j<nrows; j++) {
5728         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5729         for (l=0; l<sbs; l++) {
5730           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5731 
5732           rowlen[j*sbs+l] = ncols;
5733 
5734           len += ncols;
5735           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5736         }
5737         k++;
5738       }
5739       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5740 
5741       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5742     }
5743     /* recvs and sends of i-array are completed */
5744     i = nrecvs;
5745     while (i--) {
5746       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5747     }
5748     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5749     ierr = PetscFree(svalues);CHKERRQ(ierr);
5750 
5751     /* allocate buffers for sending j and a arrays */
5752     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5753     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5754 
5755     /* create i-array of B_oth */
5756     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5757 
5758     b_othi[0] = 0;
5759     len       = 0; /* total length of j or a array to be received */
5760     k         = 0;
5761     for (i=0; i<nrecvs; i++) {
5762       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5763       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5764       for (j=0; j<nrows; j++) {
5765         b_othi[k+1] = b_othi[k] + rowlen[j];
5766         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5767         k++;
5768       }
5769       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5770     }
5771     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5772 
5773     /* allocate space for j and a arrrays of B_oth */
5774     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5775     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5776 
5777     /* j-array */
5778     /*---------*/
5779     /*  post receives of j-array */
5780     for (i=0; i<nrecvs; i++) {
5781       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5782       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5783     }
5784 
5785     /* pack the outgoing message j-array */
5786     if (nsends) k = sstarts[0];
5787     for (i=0; i<nsends; i++) {
5788       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5789       bufJ  = bufj+sstartsj[i];
5790       for (j=0; j<nrows; j++) {
5791         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5792         for (ll=0; ll<sbs; ll++) {
5793           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5794           for (l=0; l<ncols; l++) {
5795             *bufJ++ = cols[l];
5796           }
5797           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5798         }
5799       }
5800       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5801     }
5802 
5803     /* recvs and sends of j-array are completed */
5804     i = nrecvs;
5805     while (i--) {
5806       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5807     }
5808     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5809   } else if (scall == MAT_REUSE_MATRIX) {
5810     sstartsj = *startsj_s;
5811     rstartsj = *startsj_r;
5812     bufa     = *bufa_ptr;
5813     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5814     b_otha   = b_oth->a;
5815   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5816 
5817   /* a-array */
5818   /*---------*/
5819   /*  post receives of a-array */
5820   for (i=0; i<nrecvs; i++) {
5821     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5822     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5823   }
5824 
5825   /* pack the outgoing message a-array */
5826   if (nsends) k = sstarts[0];
5827   for (i=0; i<nsends; i++) {
5828     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5829     bufA  = bufa+sstartsj[i];
5830     for (j=0; j<nrows; j++) {
5831       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5832       for (ll=0; ll<sbs; ll++) {
5833         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5834         for (l=0; l<ncols; l++) {
5835           *bufA++ = vals[l];
5836         }
5837         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5838       }
5839     }
5840     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5841   }
5842   /* recvs and sends of a-array are completed */
5843   i = nrecvs;
5844   while (i--) {
5845     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5846   }
5847   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5848   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5849 
5850   if (scall == MAT_INITIAL_MATRIX) {
5851     /* put together the new matrix */
5852     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5853 
5854     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5855     /* Since these are PETSc arrays, change flags to free them as necessary. */
5856     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5857     b_oth->free_a  = PETSC_TRUE;
5858     b_oth->free_ij = PETSC_TRUE;
5859     b_oth->nonew   = 0;
5860 
5861     ierr = PetscFree(bufj);CHKERRQ(ierr);
5862     if (!startsj_s || !bufa_ptr) {
5863       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5864       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5865     } else {
5866       *startsj_s = sstartsj;
5867       *startsj_r = rstartsj;
5868       *bufa_ptr  = bufa;
5869     }
5870   }
5871 
5872   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5873   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5874   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5875   PetscFunctionReturn(0);
5876 }
5877 
5878 /*@C
5879   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5880 
5881   Not Collective
5882 
5883   Input Parameters:
5884 . A - The matrix in mpiaij format
5885 
5886   Output Parameter:
5887 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5888 . colmap - A map from global column index to local index into lvec
5889 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5890 
5891   Level: developer
5892 
5893 @*/
5894 #if defined(PETSC_USE_CTABLE)
5895 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5896 #else
5897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5898 #endif
5899 {
5900   Mat_MPIAIJ *a;
5901 
5902   PetscFunctionBegin;
5903   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5904   PetscValidPointer(lvec, 2);
5905   PetscValidPointer(colmap, 3);
5906   PetscValidPointer(multScatter, 4);
5907   a = (Mat_MPIAIJ*) A->data;
5908   if (lvec) *lvec = a->lvec;
5909   if (colmap) *colmap = a->colmap;
5910   if (multScatter) *multScatter = a->Mvctx;
5911   PetscFunctionReturn(0);
5912 }
5913 
5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5915 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5917 #if defined(PETSC_HAVE_MKL_SPARSE)
5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5919 #endif
5920 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5922 #if defined(PETSC_HAVE_ELEMENTAL)
5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5924 #endif
5925 #if defined(PETSC_HAVE_HYPRE)
5926 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5927 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5928 #endif
5929 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5931 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5932 
5933 /*
5934     Computes (B'*A')' since computing B*A directly is untenable
5935 
5936                n                       p                          p
5937         (              )       (              )         (                  )
5938       m (      A       )  *  n (       B      )   =   m (         C        )
5939         (              )       (              )         (                  )
5940 
5941 */
5942 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5943 {
5944   PetscErrorCode ierr;
5945   Mat            At,Bt,Ct;
5946 
5947   PetscFunctionBegin;
5948   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5949   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5950   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5951   ierr = MatDestroy(&At);CHKERRQ(ierr);
5952   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5953   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5954   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5955   PetscFunctionReturn(0);
5956 }
5957 
5958 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5959 {
5960   PetscErrorCode ierr;
5961   PetscInt       m=A->rmap->n,n=B->cmap->n;
5962   Mat            Cmat;
5963 
5964   PetscFunctionBegin;
5965   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5966   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5967   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5968   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5969   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5970   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5971   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5972   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5973 
5974   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5975 
5976   *C = Cmat;
5977   PetscFunctionReturn(0);
5978 }
5979 
5980 /* ----------------------------------------------------------------*/
5981 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5982 {
5983   PetscErrorCode ierr;
5984 
5985   PetscFunctionBegin;
5986   if (scall == MAT_INITIAL_MATRIX) {
5987     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5988     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5989     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5990   }
5991   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5992   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5993   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5994   PetscFunctionReturn(0);
5995 }
5996 
5997 /*MC
5998    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5999 
6000    Options Database Keys:
6001 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6002 
6003    Level: beginner
6004 
6005    Notes:
6006     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6007     in this case the values associated with the rows and columns one passes in are set to zero
6008     in the matrix
6009 
6010     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6011     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6012 
6013 .seealso: MatCreateAIJ()
6014 M*/
6015 
6016 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6017 {
6018   Mat_MPIAIJ     *b;
6019   PetscErrorCode ierr;
6020   PetscMPIInt    size;
6021 
6022   PetscFunctionBegin;
6023   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6024 
6025   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6026   B->data       = (void*)b;
6027   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6028   B->assembled  = PETSC_FALSE;
6029   B->insertmode = NOT_SET_VALUES;
6030   b->size       = size;
6031 
6032   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6033 
6034   /* build cache for off array entries formed */
6035   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6036 
6037   b->donotstash  = PETSC_FALSE;
6038   b->colmap      = 0;
6039   b->garray      = 0;
6040   b->roworiented = PETSC_TRUE;
6041 
6042   /* stuff used for matrix vector multiply */
6043   b->lvec  = NULL;
6044   b->Mvctx = NULL;
6045 
6046   /* stuff for MatGetRow() */
6047   b->rowindices   = 0;
6048   b->rowvalues    = 0;
6049   b->getrowactive = PETSC_FALSE;
6050 
6051   /* flexible pointer used in CUSP/CUSPARSE classes */
6052   b->spptr = NULL;
6053 
6054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6064 #if defined(PETSC_HAVE_MKL_SPARSE)
6065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6066 #endif
6067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6070 #if defined(PETSC_HAVE_ELEMENTAL)
6071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6072 #endif
6073 #if defined(PETSC_HAVE_HYPRE)
6074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6075 #endif
6076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6081 #if defined(PETSC_HAVE_HYPRE)
6082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6083 #endif
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6086   PetscFunctionReturn(0);
6087 }
6088 
6089 /*@C
6090      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6091          and "off-diagonal" part of the matrix in CSR format.
6092 
6093    Collective
6094 
6095    Input Parameters:
6096 +  comm - MPI communicator
6097 .  m - number of local rows (Cannot be PETSC_DECIDE)
6098 .  n - This value should be the same as the local size used in creating the
6099        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6100        calculated if N is given) For square matrices n is almost always m.
6101 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6102 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6103 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6104 .   j - column indices
6105 .   a - matrix values
6106 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6107 .   oj - column indices
6108 -   oa - matrix values
6109 
6110    Output Parameter:
6111 .   mat - the matrix
6112 
6113    Level: advanced
6114 
6115    Notes:
6116        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6117        must free the arrays once the matrix has been destroyed and not before.
6118 
6119        The i and j indices are 0 based
6120 
6121        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6122 
6123        This sets local rows and cannot be used to set off-processor values.
6124 
6125        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6126        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6127        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6128        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6129        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6130        communication if it is known that only local entries will be set.
6131 
6132 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6133           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6134 @*/
6135 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6136 {
6137   PetscErrorCode ierr;
6138   Mat_MPIAIJ     *maij;
6139 
6140   PetscFunctionBegin;
6141   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6142   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6143   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6144   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6145   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6146   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6147   maij = (Mat_MPIAIJ*) (*mat)->data;
6148 
6149   (*mat)->preallocated = PETSC_TRUE;
6150 
6151   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6152   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6153 
6154   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6155   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6156 
6157   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6158   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6159   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6160   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6161 
6162   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6163   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6164   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6165   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6166   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6167   PetscFunctionReturn(0);
6168 }
6169 
6170 /*
6171     Special version for direct calls from Fortran
6172 */
6173 #include <petsc/private/fortranimpl.h>
6174 
6175 /* Change these macros so can be used in void function */
6176 #undef CHKERRQ
6177 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6178 #undef SETERRQ2
6179 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6180 #undef SETERRQ3
6181 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6182 #undef SETERRQ
6183 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6184 
6185 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6186 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6187 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6188 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6189 #else
6190 #endif
6191 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6192 {
6193   Mat            mat  = *mmat;
6194   PetscInt       m    = *mm, n = *mn;
6195   InsertMode     addv = *maddv;
6196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6197   PetscScalar    value;
6198   PetscErrorCode ierr;
6199 
6200   MatCheckPreallocated(mat,1);
6201   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6202 
6203 #if defined(PETSC_USE_DEBUG)
6204   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6205 #endif
6206   {
6207     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6208     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6209     PetscBool roworiented = aij->roworiented;
6210 
6211     /* Some Variables required in the macro */
6212     Mat        A                 = aij->A;
6213     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6214     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6215     MatScalar  *aa               = a->a;
6216     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6217     Mat        B                 = aij->B;
6218     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6219     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6220     MatScalar  *ba               = b->a;
6221 
6222     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6223     PetscInt  nonew = a->nonew;
6224     MatScalar *ap1,*ap2;
6225 
6226     PetscFunctionBegin;
6227     for (i=0; i<m; i++) {
6228       if (im[i] < 0) continue;
6229 #if defined(PETSC_USE_DEBUG)
6230       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6231 #endif
6232       if (im[i] >= rstart && im[i] < rend) {
6233         row      = im[i] - rstart;
6234         lastcol1 = -1;
6235         rp1      = aj + ai[row];
6236         ap1      = aa + ai[row];
6237         rmax1    = aimax[row];
6238         nrow1    = ailen[row];
6239         low1     = 0;
6240         high1    = nrow1;
6241         lastcol2 = -1;
6242         rp2      = bj + bi[row];
6243         ap2      = ba + bi[row];
6244         rmax2    = bimax[row];
6245         nrow2    = bilen[row];
6246         low2     = 0;
6247         high2    = nrow2;
6248 
6249         for (j=0; j<n; j++) {
6250           if (roworiented) value = v[i*n+j];
6251           else value = v[i+j*m];
6252           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6253           if (in[j] >= cstart && in[j] < cend) {
6254             col = in[j] - cstart;
6255             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6256           } else if (in[j] < 0) continue;
6257 #if defined(PETSC_USE_DEBUG)
6258           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6259           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6260 #endif
6261           else {
6262             if (mat->was_assembled) {
6263               if (!aij->colmap) {
6264                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6265               }
6266 #if defined(PETSC_USE_CTABLE)
6267               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6268               col--;
6269 #else
6270               col = aij->colmap[in[j]] - 1;
6271 #endif
6272               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6273                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6274                 col  =  in[j];
6275                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6276                 B     = aij->B;
6277                 b     = (Mat_SeqAIJ*)B->data;
6278                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6279                 rp2   = bj + bi[row];
6280                 ap2   = ba + bi[row];
6281                 rmax2 = bimax[row];
6282                 nrow2 = bilen[row];
6283                 low2  = 0;
6284                 high2 = nrow2;
6285                 bm    = aij->B->rmap->n;
6286                 ba    = b->a;
6287               }
6288             } else col = in[j];
6289             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6290           }
6291         }
6292       } else if (!aij->donotstash) {
6293         if (roworiented) {
6294           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6295         } else {
6296           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6297         }
6298       }
6299     }
6300   }
6301   PetscFunctionReturnVoid();
6302 }
6303