xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e7a4bfd48bf8e3b2845a4693534ad019dd407eea)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840 
841         i = j;
842       }
843     }
844     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
845   }
846 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
847   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
848   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
849   if (mat->boundtocpu) {
850     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
851     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
852   }
853 #endif
854   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
855   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
856 
857   /* determine if any processor has disassembled, if so we must
858      also disassemble ourself, in order that we may reassemble. */
859   /*
860      if nonzero structure of submatrix B cannot change then we know that
861      no processor disassembled thus we can skip this stuff
862   */
863   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
864     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
865     if (mat->was_assembled && !other_disassembled) {
866 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
867       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
868 #endif
869       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
870     }
871   }
872   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
873     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
874   }
875   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
876 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
877   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
878 #endif
879   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
880   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
881 
882   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
883 
884   aij->rowvalues = 0;
885 
886   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
887   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
888 
889   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
890   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
891     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
892     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
893   }
894 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
895   mat->offloadmask = PETSC_OFFLOAD_BOTH;
896 #endif
897   PetscFunctionReturn(0);
898 }
899 
900 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
901 {
902   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
903   PetscErrorCode ierr;
904 
905   PetscFunctionBegin;
906   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
907   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
914   PetscObjectState sA, sB;
915   PetscInt        *lrows;
916   PetscInt         r, len;
917   PetscBool        cong, lch, gch;
918   PetscErrorCode   ierr;
919 
920   PetscFunctionBegin;
921   /* get locally owned rows */
922   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
923   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
924   /* fix right hand side if needed */
925   if (x && b) {
926     const PetscScalar *xx;
927     PetscScalar       *bb;
928 
929     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
930     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
931     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
932     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
933     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
934     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
935   }
936 
937   sA = mat->A->nonzerostate;
938   sB = mat->B->nonzerostate;
939 
940   if (diag != 0.0 && cong) {
941     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
942     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
943   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
944     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
945     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
946     PetscInt   nnwA, nnwB;
947     PetscBool  nnzA, nnzB;
948 
949     nnwA = aijA->nonew;
950     nnwB = aijB->nonew;
951     nnzA = aijA->keepnonzeropattern;
952     nnzB = aijB->keepnonzeropattern;
953     if (!nnzA) {
954       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
955       aijA->nonew = 0;
956     }
957     if (!nnzB) {
958       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
959       aijB->nonew = 0;
960     }
961     /* Must zero here before the next loop */
962     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
964     for (r = 0; r < len; ++r) {
965       const PetscInt row = lrows[r] + A->rmap->rstart;
966       if (row >= A->cmap->N) continue;
967       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
968     }
969     aijA->nonew = nnwA;
970     aijB->nonew = nnwB;
971   } else {
972     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
974   }
975   ierr = PetscFree(lrows);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978 
979   /* reduce nonzerostate */
980   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
981   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
982   if (gch) A->nonzerostate++;
983   PetscFunctionReturn(0);
984 }
985 
986 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
987 {
988   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode    ierr;
990   PetscMPIInt       n = A->rmap->n;
991   PetscInt          i,j,r,m,len = 0;
992   PetscInt          *lrows,*owners = A->rmap->range;
993   PetscMPIInt       p = 0;
994   PetscSFNode       *rrows;
995   PetscSF           sf;
996   const PetscScalar *xx;
997   PetscScalar       *bb,*mask;
998   Vec               xmask,lmask;
999   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
1000   const PetscInt    *aj, *ii,*ridx;
1001   PetscScalar       *aa;
1002 
1003   PetscFunctionBegin;
1004   /* Create SF where leaves are input rows and roots are owned rows */
1005   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1006   for (r = 0; r < n; ++r) lrows[r] = -1;
1007   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1008   for (r = 0; r < N; ++r) {
1009     const PetscInt idx   = rows[r];
1010     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1011     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1012       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1013     }
1014     rrows[r].rank  = p;
1015     rrows[r].index = rows[r] - owners[p];
1016   }
1017   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1018   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1019   /* Collect flags for rows to be zeroed */
1020   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1022   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1023   /* Compress and put in row numbers */
1024   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1025   /* zero diagonal part of matrix */
1026   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1027   /* handle off diagonal part of matrix */
1028   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1029   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1030   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1031   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1032   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1033   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1036   if (x && b) { /* this code is buggy when the row and column layout don't match */
1037     PetscBool cong;
1038 
1039     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1040     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1041     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1044     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1045   }
1046   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1047   /* remove zeroed rows of off diagonal matrix */
1048   ii = aij->i;
1049   for (i=0; i<len; i++) {
1050     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1051   }
1052   /* loop over all elements of off process part of matrix zeroing removed columns*/
1053   if (aij->compressedrow.use) {
1054     m    = aij->compressedrow.nrows;
1055     ii   = aij->compressedrow.i;
1056     ridx = aij->compressedrow.rindex;
1057     for (i=0; i<m; i++) {
1058       n  = ii[i+1] - ii[i];
1059       aj = aij->j + ii[i];
1060       aa = aij->a + ii[i];
1061 
1062       for (j=0; j<n; j++) {
1063         if (PetscAbsScalar(mask[*aj])) {
1064           if (b) bb[*ridx] -= *aa*xx[*aj];
1065           *aa = 0.0;
1066         }
1067         aa++;
1068         aj++;
1069       }
1070       ridx++;
1071     }
1072   } else { /* do not use compressed row format */
1073     m = l->B->rmap->n;
1074     for (i=0; i<m; i++) {
1075       n  = ii[i+1] - ii[i];
1076       aj = aij->j + ii[i];
1077       aa = aij->a + ii[i];
1078       for (j=0; j<n; j++) {
1079         if (PetscAbsScalar(mask[*aj])) {
1080           if (b) bb[i] -= *aa*xx[*aj];
1081           *aa = 0.0;
1082         }
1083         aa++;
1084         aj++;
1085       }
1086     }
1087   }
1088   if (x && b) {
1089     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1090     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1091   }
1092   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1093   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1094   ierr = PetscFree(lrows);CHKERRQ(ierr);
1095 
1096   /* only change matrix nonzero state if pattern was allowed to be changed */
1097   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1098     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1099     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1100   }
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1105 {
1106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       nt;
1109   VecScatter     Mvctx = a->Mvctx;
1110 
1111   PetscFunctionBegin;
1112   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1113   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1114 
1115   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1123 {
1124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136   VecScatter     Mvctx = a->Mvctx;
1137 
1138   PetscFunctionBegin;
1139   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1140   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1142   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* do local part */
1156   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1157   /* add partial results together */
1158   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1164 {
1165   MPI_Comm       comm;
1166   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1167   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1168   IS             Me,Notme;
1169   PetscErrorCode ierr;
1170   PetscInt       M,N,first,last,*notme,i;
1171   PetscBool      lf;
1172   PetscMPIInt    size;
1173 
1174   PetscFunctionBegin;
1175   /* Easy test: symmetric diagonal block */
1176   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1177   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1178   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1179   if (!*f) PetscFunctionReturn(0);
1180   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1182   if (size == 1) PetscFunctionReturn(0);
1183 
1184   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1185   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1186   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1187   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1188   for (i=0; i<first; i++) notme[i] = i;
1189   for (i=last; i<M; i++) notme[i-last+first] = i;
1190   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1191   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1192   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1193   Aoff = Aoffs[0];
1194   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1195   Boff = Boffs[0];
1196   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1197   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1198   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1199   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1200   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1201   ierr = PetscFree(notme);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1206 {
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215 {
1216   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   /* do nondiagonal part */
1221   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1222   /* do local part */
1223   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1224   /* add partial results together */
1225   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*
1231   This only works correctly for square matrices where the subblock A->A is the
1232    diagonal block
1233 */
1234 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1235 {
1236   PetscErrorCode ierr;
1237   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1238 
1239   PetscFunctionBegin;
1240   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1241   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1242   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1247 {
1248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1253   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1258 {
1259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263 #if defined(PETSC_USE_LOG)
1264   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1265 #endif
1266   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1267   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1268   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1269   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1270 #if defined(PETSC_USE_CTABLE)
1271   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1272 #else
1273   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1274 #endif
1275   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1276   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1278   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1279   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1280   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1281   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1282 
1283   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1293 #if defined(PETSC_HAVE_ELEMENTAL)
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1295 #endif
1296 #if defined(PETSC_HAVE_HYPRE)
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1299 #endif
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1307 {
1308   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1309   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1310   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1311   const PetscInt    *garray = aij->garray;
1312   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1313   PetscInt          *rowlens;
1314   PetscInt          *colidxs;
1315   PetscScalar       *matvals;
1316   PetscErrorCode    ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1320 
1321   M  = mat->rmap->N;
1322   N  = mat->cmap->N;
1323   m  = mat->rmap->n;
1324   rs = mat->rmap->rstart;
1325   cs = mat->cmap->rstart;
1326   nz = A->nz + B->nz;
1327 
1328   /* write matrix header */
1329   header[0] = MAT_FILE_CLASSID;
1330   header[1] = M; header[2] = N; header[3] = nz;
1331   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1332   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1333 
1334   /* fill in and store row lengths  */
1335   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1336   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1337   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1338   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1339 
1340   /* fill in and store column indices */
1341   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1342   for (cnt=0, i=0; i<m; i++) {
1343     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1344       if (garray[B->j[jb]] > cs) break;
1345       colidxs[cnt++] = garray[B->j[jb]];
1346     }
1347     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1348       colidxs[cnt++] = A->j[ja] + cs;
1349     for (; jb<B->i[i+1]; jb++)
1350       colidxs[cnt++] = garray[B->j[jb]];
1351   }
1352   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1353   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1354   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1355 
1356   /* fill in and store nonzero values */
1357   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1358   for (cnt=0, i=0; i<m; i++) {
1359     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1360       if (garray[B->j[jb]] > cs) break;
1361       matvals[cnt++] = B->a[jb];
1362     }
1363     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1364       matvals[cnt++] = A->a[ja];
1365     for (; jb<B->i[i+1]; jb++)
1366       matvals[cnt++] = B->a[jb];
1367   }
1368   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1369   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1370   ierr = PetscFree(matvals);CHKERRQ(ierr);
1371 
1372   /* write block size option to the viewer's .info file */
1373   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #include <petscdraw.h>
1378 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1379 {
1380   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1381   PetscErrorCode    ierr;
1382   PetscMPIInt       rank = aij->rank,size = aij->size;
1383   PetscBool         isdraw,iascii,isbinary;
1384   PetscViewer       sviewer;
1385   PetscViewerFormat format;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1391   if (iascii) {
1392     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1393     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1394       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1395       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1396       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397       for (i=0; i<(PetscInt)size; i++) {
1398         nmax = PetscMax(nmax,nz[i]);
1399         nmin = PetscMin(nmin,nz[i]);
1400         navg += nz[i];
1401       }
1402       ierr = PetscFree(nz);CHKERRQ(ierr);
1403       navg = navg/size;
1404       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1405       PetscFunctionReturn(0);
1406     }
1407     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1408     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1409       MatInfo   info;
1410       PetscBool inodes;
1411 
1412       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1413       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1414       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1415       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1416       if (!inodes) {
1417         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1418                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1419       } else {
1420         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1421                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1422       }
1423       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1424       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1425       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1426       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1427       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1429       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1430       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1431       PetscFunctionReturn(0);
1432     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1433       PetscInt inodecount,inodelimit,*inodes;
1434       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1435       if (inodes) {
1436         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1437       } else {
1438         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1439       }
1440       PetscFunctionReturn(0);
1441     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1442       PetscFunctionReturn(0);
1443     }
1444   } else if (isbinary) {
1445     if (size == 1) {
1446       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1447       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1448     } else {
1449       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1450     }
1451     PetscFunctionReturn(0);
1452   } else if (iascii && size == 1) {
1453     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1454     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1455     PetscFunctionReturn(0);
1456   } else if (isdraw) {
1457     PetscDraw draw;
1458     PetscBool isnull;
1459     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1460     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1461     if (isnull) PetscFunctionReturn(0);
1462   }
1463 
1464   { /* assemble the entire matrix onto first processor */
1465     Mat A = NULL, Av;
1466     IS  isrow,iscol;
1467 
1468     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1469     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1470     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1471     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1472 /*  The commented code uses MatCreateSubMatrices instead */
1473 /*
1474     Mat *AA, A = NULL, Av;
1475     IS  isrow,iscol;
1476 
1477     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1478     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1479     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1480     if (!rank) {
1481        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1482        A    = AA[0];
1483        Av   = AA[0];
1484     }
1485     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1486 */
1487     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1488     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1489     /*
1490        Everyone has to call to draw the matrix since the graphics waits are
1491        synchronized across all processors that share the PetscDraw object
1492     */
1493     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1494     if (!rank) {
1495       if (((PetscObject)mat)->name) {
1496         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1497       }
1498       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1499     }
1500     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1501     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1502     ierr = MatDestroy(&A);CHKERRQ(ierr);
1503   }
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1508 {
1509   PetscErrorCode ierr;
1510   PetscBool      iascii,isdraw,issocket,isbinary;
1511 
1512   PetscFunctionBegin;
1513   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1514   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1515   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1516   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1517   if (iascii || isdraw || isbinary || issocket) {
1518     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1519   }
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1524 {
1525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1526   PetscErrorCode ierr;
1527   Vec            bb1 = 0;
1528   PetscBool      hasop;
1529 
1530   PetscFunctionBegin;
1531   if (flag == SOR_APPLY_UPPER) {
1532     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1533     PetscFunctionReturn(0);
1534   }
1535 
1536   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1537     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1538   }
1539 
1540   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1541     if (flag & SOR_ZERO_INITIAL_GUESS) {
1542       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1543       its--;
1544     }
1545 
1546     while (its--) {
1547       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1549 
1550       /* update rhs: bb1 = bb - B*x */
1551       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1552       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1553 
1554       /* local sweep */
1555       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1556     }
1557   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1558     if (flag & SOR_ZERO_INITIAL_GUESS) {
1559       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1560       its--;
1561     }
1562     while (its--) {
1563       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1565 
1566       /* update rhs: bb1 = bb - B*x */
1567       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1568       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1569 
1570       /* local sweep */
1571       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1572     }
1573   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1574     if (flag & SOR_ZERO_INITIAL_GUESS) {
1575       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1576       its--;
1577     }
1578     while (its--) {
1579       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1581 
1582       /* update rhs: bb1 = bb - B*x */
1583       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1584       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1585 
1586       /* local sweep */
1587       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1588     }
1589   } else if (flag & SOR_EISENSTAT) {
1590     Vec xx1;
1591 
1592     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1593     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1594 
1595     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1597     if (!mat->diag) {
1598       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1599       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1600     }
1601     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1602     if (hasop) {
1603       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1604     } else {
1605       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1606     }
1607     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1608 
1609     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1610 
1611     /* local sweep */
1612     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1613     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1614     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1615   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1616 
1617   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1618 
1619   matin->factorerrortype = mat->A->factorerrortype;
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1624 {
1625   Mat            aA,aB,Aperm;
1626   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1627   PetscScalar    *aa,*ba;
1628   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1629   PetscSF        rowsf,sf;
1630   IS             parcolp = NULL;
1631   PetscBool      done;
1632   PetscErrorCode ierr;
1633 
1634   PetscFunctionBegin;
1635   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1636   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1637   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1638   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1639 
1640   /* Invert row permutation to find out where my rows should go */
1641   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1642   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1643   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1644   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1645   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1646   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1647 
1648   /* Invert column permutation to find out where my columns should go */
1649   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1650   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1651   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1652   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1653   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1654   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1656 
1657   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1658   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1659   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1660 
1661   /* Find out where my gcols should go */
1662   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1663   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1664   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1665   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1666   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1667   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1669   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1670 
1671   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1672   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1673   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1674   for (i=0; i<m; i++) {
1675     PetscInt    row = rdest[i];
1676     PetscMPIInt rowner;
1677     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       PetscInt    col = cdest[aj[j]];
1680       PetscMPIInt cowner;
1681       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1682       if (rowner == cowner) dnnz[i]++;
1683       else onnz[i]++;
1684     }
1685     for (j=bi[i]; j<bi[i+1]; j++) {
1686       PetscInt    col = gcdest[bj[j]];
1687       PetscMPIInt cowner;
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692   }
1693   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1694   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1695   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1696   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1697   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1698 
1699   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1700   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1701   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1702   for (i=0; i<m; i++) {
1703     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1704     PetscInt j0,rowlen;
1705     rowlen = ai[i+1] - ai[i];
1706     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1707       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1708       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1709     }
1710     rowlen = bi[i+1] - bi[i];
1711     for (j0=j=0; j<rowlen; j0=j) {
1712       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1713       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1714     }
1715   }
1716   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1717   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1718   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1719   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1720   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1721   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1722   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1723   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1724   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1725   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1726   *B = Aperm;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1731 {
1732   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1737   if (ghosts) *ghosts = aij->garray;
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   Mat            A    = mat->A,B = mat->B;
1745   PetscErrorCode ierr;
1746   PetscLogDouble isend[5],irecv[5];
1747 
1748   PetscFunctionBegin;
1749   info->block_size = 1.0;
1750   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1751 
1752   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1753   isend[3] = info->memory;  isend[4] = info->mallocs;
1754 
1755   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1756 
1757   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1758   isend[3] += info->memory;  isend[4] += info->mallocs;
1759   if (flag == MAT_LOCAL) {
1760     info->nz_used      = isend[0];
1761     info->nz_allocated = isend[1];
1762     info->nz_unneeded  = isend[2];
1763     info->memory       = isend[3];
1764     info->mallocs      = isend[4];
1765   } else if (flag == MAT_GLOBAL_MAX) {
1766     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1767 
1768     info->nz_used      = irecv[0];
1769     info->nz_allocated = irecv[1];
1770     info->nz_unneeded  = irecv[2];
1771     info->memory       = irecv[3];
1772     info->mallocs      = irecv[4];
1773   } else if (flag == MAT_GLOBAL_SUM) {
1774     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1775 
1776     info->nz_used      = irecv[0];
1777     info->nz_allocated = irecv[1];
1778     info->nz_unneeded  = irecv[2];
1779     info->memory       = irecv[3];
1780     info->mallocs      = irecv[4];
1781   }
1782   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1783   info->fill_ratio_needed = 0;
1784   info->factor_mallocs    = 0;
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1789 {
1790   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1791   PetscErrorCode ierr;
1792 
1793   PetscFunctionBegin;
1794   switch (op) {
1795   case MAT_NEW_NONZERO_LOCATIONS:
1796   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1797   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1798   case MAT_KEEP_NONZERO_PATTERN:
1799   case MAT_NEW_NONZERO_LOCATION_ERR:
1800   case MAT_IGNORE_ZERO_ENTRIES:
1801     MatCheckPreallocated(A,1);
1802     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1803     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1804     break;
1805   case MAT_USE_INODES:
1806     if (PetscUnlikely(!(A)->preallocated)) {
1807       a->inode_setoption = PETSC_TRUE; /* option will be set in MatMPIAIJSetPreallocation_MPIAIJ() */
1808       a->inode_use       = flg;
1809     } else {
1810       a->inode_setoption = PETSC_FALSE;
1811       ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812       ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1813     }
1814     break;
1815   case MAT_ROW_ORIENTED:
1816     MatCheckPreallocated(A,1);
1817     a->roworiented = flg;
1818 
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1821     break;
1822   case MAT_NEW_DIAGONALS:
1823   case MAT_SORTED_FULL:
1824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1825     break;
1826   case MAT_IGNORE_OFF_PROC_ENTRIES:
1827     a->donotstash = flg;
1828     break;
1829   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1830   case MAT_SPD:
1831   case MAT_SYMMETRIC:
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833   case MAT_HERMITIAN:
1834   case MAT_SYMMETRY_ETERNAL:
1835     break;
1836   case MAT_SUBMAT_SINGLEIS:
1837     A->submat_singleis = flg;
1838     break;
1839   case MAT_STRUCTURE_ONLY:
1840     /* The option is handled directly by MatSetOption() */
1841     break;
1842   default:
1843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1849 {
1850   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1851   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1852   PetscErrorCode ierr;
1853   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1854   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1855   PetscInt       *cmap,*idx_p;
1856 
1857   PetscFunctionBegin;
1858   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1859   mat->getrowactive = PETSC_TRUE;
1860 
1861   if (!mat->rowvalues && (idx || v)) {
1862     /*
1863         allocate enough space to hold information from the longest row.
1864     */
1865     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1866     PetscInt   max = 1,tmp;
1867     for (i=0; i<matin->rmap->n; i++) {
1868       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1869       if (max < tmp) max = tmp;
1870     }
1871     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1872   }
1873 
1874   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1875   lrow = row - rstart;
1876 
1877   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1878   if (!v)   {pvA = 0; pvB = 0;}
1879   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1880   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1881   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1882   nztot = nzA + nzB;
1883 
1884   cmap = mat->garray;
1885   if (v  || idx) {
1886     if (nztot) {
1887       /* Sort by increasing column numbers, assuming A and B already sorted */
1888       PetscInt imark = -1;
1889       if (v) {
1890         *v = v_p = mat->rowvalues;
1891         for (i=0; i<nzB; i++) {
1892           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1893           else break;
1894         }
1895         imark = i;
1896         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1897         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1898       }
1899       if (idx) {
1900         *idx = idx_p = mat->rowindices;
1901         if (imark > -1) {
1902           for (i=0; i<imark; i++) {
1903             idx_p[i] = cmap[cworkB[i]];
1904           }
1905         } else {
1906           for (i=0; i<nzB; i++) {
1907             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1908             else break;
1909           }
1910           imark = i;
1911         }
1912         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1913         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1914       }
1915     } else {
1916       if (idx) *idx = 0;
1917       if (v)   *v   = 0;
1918     }
1919   }
1920   *nz  = nztot;
1921   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1922   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1927 {
1928   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1929 
1930   PetscFunctionBegin;
1931   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1932   aij->getrowactive = PETSC_FALSE;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1937 {
1938   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1939   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,cstart = mat->cmap->rstart;
1942   PetscReal      sum = 0.0;
1943   MatScalar      *v;
1944 
1945   PetscFunctionBegin;
1946   if (aij->size == 1) {
1947     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1948   } else {
1949     if (type == NORM_FROBENIUS) {
1950       v = amat->a;
1951       for (i=0; i<amat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       v = bmat->a;
1955       for (i=0; i<bmat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1961     } else if (type == NORM_1) { /* max column norm */
1962       PetscReal *tmp,*tmp2;
1963       PetscInt  *jj,*garray = aij->garray;
1964       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1965       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v     = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1982     } else if (type == NORM_INFINITY) { /* max row norm */
1983       PetscReal ntemp = 0.0;
1984       for (j=0; j<aij->A->rmap->n; j++) {
1985         v   = amat->a + amat->i[j];
1986         sum = 0.0;
1987         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         v = bmat->a + bmat->i[j];
1991         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         if (sum > ntemp) ntemp = sum;
1995       }
1996       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1997       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1998     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2004 {
2005   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2006   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2007   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2008   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2009   PetscErrorCode  ierr;
2010   Mat             B,A_diag,*B_diag;
2011   const MatScalar *array;
2012 
2013   PetscFunctionBegin;
2014   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2015   ai = Aloc->i; aj = Aloc->j;
2016   bi = Bloc->i; bj = Bloc->j;
2017   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2018     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2019     PetscSFNode          *oloc;
2020     PETSC_UNUSED PetscSF sf;
2021 
2022     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2023     /* compute d_nnz for preallocation */
2024     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2025     for (i=0; i<ai[ma]; i++) {
2026       d_nnz[aj[i]]++;
2027     }
2028     /* compute local off-diagonal contributions */
2029     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2030     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2031     /* map those to global */
2032     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2033     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2034     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2035     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2036     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2037     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2038     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2039 
2040     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2041     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2042     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2043     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2044     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2045     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2046   } else {
2047     B    = *matout;
2048     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2049   }
2050 
2051   b           = (Mat_MPIAIJ*)B->data;
2052   A_diag      = a->A;
2053   B_diag      = &b->A;
2054   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2055   A_diag_ncol = A_diag->cmap->N;
2056   B_diag_ilen = sub_B_diag->ilen;
2057   B_diag_i    = sub_B_diag->i;
2058 
2059   /* Set ilen for diagonal of B */
2060   for (i=0; i<A_diag_ncol; i++) {
2061     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2062   }
2063 
2064   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2065   very quickly (=without using MatSetValues), because all writes are local. */
2066   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2067 
2068   /* copy over the B part */
2069   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2070   array = Bloc->a;
2071   row   = A->rmap->rstart;
2072   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2073   cols_tmp = cols;
2074   for (i=0; i<mb; i++) {
2075     ncol = bi[i+1]-bi[i];
2076     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2077     row++;
2078     array += ncol; cols_tmp += ncol;
2079   }
2080   ierr = PetscFree(cols);CHKERRQ(ierr);
2081 
2082   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2085     *matout = B;
2086   } else {
2087     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2093 {
2094   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2095   Mat            a    = aij->A,b = aij->B;
2096   PetscErrorCode ierr;
2097   PetscInt       s1,s2,s3;
2098 
2099   PetscFunctionBegin;
2100   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2101   if (rr) {
2102     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2103     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2104     /* Overlap communication with computation. */
2105     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2106   }
2107   if (ll) {
2108     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2109     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2110     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2111   }
2112   /* scale  the diagonal block */
2113   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2114 
2115   if (rr) {
2116     /* Do a scatter end and then right scale the off-diagonal block */
2117     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2124 {
2125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2134 {
2135   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2136   Mat            a,b,c,d;
2137   PetscBool      flg;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   a = matA->A; b = matA->B;
2142   c = matB->A; d = matB->B;
2143 
2144   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2145   if (flg) {
2146     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2147   }
2148   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2153 {
2154   PetscErrorCode ierr;
2155   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2156   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2157 
2158   PetscFunctionBegin;
2159   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2160   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2161     /* because of the column compression in the off-processor part of the matrix a->B,
2162        the number of columns in a->B and b->B may be different, hence we cannot call
2163        the MatCopy() directly on the two parts. If need be, we can provide a more
2164        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2165        then copying the submatrices */
2166     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2167   } else {
2168     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2169     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2170   }
2171   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2176 {
2177   PetscErrorCode ierr;
2178 
2179   PetscFunctionBegin;
2180   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /*
2185    Computes the number of nonzeros per row needed for preallocation when X and Y
2186    have different nonzero structure.
2187 */
2188 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2189 {
2190   PetscInt       i,j,k,nzx,nzy;
2191 
2192   PetscFunctionBegin;
2193   /* Set the number of nonzeros in the new matrix */
2194   for (i=0; i<m; i++) {
2195     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2196     nzx = xi[i+1] - xi[i];
2197     nzy = yi[i+1] - yi[i];
2198     nnz[i] = 0;
2199     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2200       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2201       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2202       nnz[i]++;
2203     }
2204     for (; k<nzy; k++) nnz[i]++;
2205   }
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2210 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2211 {
2212   PetscErrorCode ierr;
2213   PetscInt       m = Y->rmap->N;
2214   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2215   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2216 
2217   PetscFunctionBegin;
2218   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2223 {
2224   PetscErrorCode ierr;
2225   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2226   PetscBLASInt   bnz,one=1;
2227   Mat_SeqAIJ     *x,*y;
2228 
2229   PetscFunctionBegin;
2230   if (str == SAME_NONZERO_PATTERN) {
2231     PetscScalar alpha = a;
2232     x    = (Mat_SeqAIJ*)xx->A->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     y    = (Mat_SeqAIJ*)yy->A->data;
2235     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2236     x    = (Mat_SeqAIJ*)xx->B->data;
2237     y    = (Mat_SeqAIJ*)yy->B->data;
2238     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2239     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2240     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2241     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2242        will be updated */
2243 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2244     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2245       Y->offloadmask = PETSC_OFFLOAD_CPU;
2246     }
2247 #endif
2248   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2249     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2250   } else {
2251     Mat      B;
2252     PetscInt *nnz_d,*nnz_o;
2253     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2254     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2255     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2256     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2257     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2258     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2259     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2260     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2261     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2262     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2263     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2264     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2265     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2266     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2267   }
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2272 
2273 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2274 {
2275 #if defined(PETSC_USE_COMPLEX)
2276   PetscErrorCode ierr;
2277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2281   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2282 #else
2283   PetscFunctionBegin;
2284 #endif
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2289 {
2290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2295   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2300 {
2301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2306   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2311 {
2312   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2313   PetscErrorCode ierr;
2314   PetscInt       i,*idxb = 0;
2315   PetscScalar    *va,*vb;
2316   Vec            vtmp;
2317 
2318   PetscFunctionBegin;
2319   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2320   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2321   if (idx) {
2322     for (i=0; i<A->rmap->n; i++) {
2323       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2324     }
2325   }
2326 
2327   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2328   if (idx) {
2329     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2330   }
2331   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2332   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2333 
2334   for (i=0; i<A->rmap->n; i++) {
2335     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2336       va[i] = vb[i];
2337       if (idx) idx[i] = a->garray[idxb[i]];
2338     }
2339   }
2340 
2341   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2342   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2343   ierr = PetscFree(idxb);CHKERRQ(ierr);
2344   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352   PetscInt       i,*idxb = 0;
2353   PetscScalar    *va,*vb;
2354   Vec            vtmp;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2359   if (idx) {
2360     for (i=0; i<A->cmap->n; i++) {
2361       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2362     }
2363   }
2364 
2365   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2366   if (idx) {
2367     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2368   }
2369   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2370   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2371 
2372   for (i=0; i<A->rmap->n; i++) {
2373     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2374       va[i] = vb[i];
2375       if (idx) idx[i] = a->garray[idxb[i]];
2376     }
2377   }
2378 
2379   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2381   ierr = PetscFree(idxb);CHKERRQ(ierr);
2382   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2387 {
2388   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2389   PetscInt       n      = A->rmap->n;
2390   PetscInt       cstart = A->cmap->rstart;
2391   PetscInt       *cmap  = mat->garray;
2392   PetscInt       *diagIdx, *offdiagIdx;
2393   Vec            diagV, offdiagV;
2394   PetscScalar    *a, *diagA, *offdiagA;
2395   PetscInt       r;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2400   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2402   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2403   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2404   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   for (r = 0; r < n; ++r) {
2408     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2409       a[r]   = diagA[r];
2410       idx[r] = cstart + diagIdx[r];
2411     } else {
2412       a[r]   = offdiagA[r];
2413       idx[r] = cmap[offdiagIdx[r]];
2414     }
2415   }
2416   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2426 {
2427   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2428   PetscInt       n      = A->rmap->n;
2429   PetscInt       cstart = A->cmap->rstart;
2430   PetscInt       *cmap  = mat->garray;
2431   PetscInt       *diagIdx, *offdiagIdx;
2432   Vec            diagV, offdiagV;
2433   PetscScalar    *a, *diagA, *offdiagA;
2434   PetscInt       r;
2435   PetscErrorCode ierr;
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2440   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2441   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2442   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2443   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2444   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2445   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   for (r = 0; r < n; ++r) {
2447     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2448       a[r]   = diagA[r];
2449       idx[r] = cstart + diagIdx[r];
2450     } else {
2451       a[r]   = offdiagA[r];
2452       idx[r] = cmap[offdiagIdx[r]];
2453     }
2454   }
2455   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2456   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2457   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2458   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2459   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2460   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2465 {
2466   PetscErrorCode ierr;
2467   Mat            *dummy;
2468 
2469   PetscFunctionBegin;
2470   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2471   *newmat = *dummy;
2472   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2477 {
2478   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2479   PetscErrorCode ierr;
2480 
2481   PetscFunctionBegin;
2482   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2483   A->factorerrortype = a->A->factorerrortype;
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2488 {
2489   PetscErrorCode ierr;
2490   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2491 
2492   PetscFunctionBegin;
2493   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2494   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2495   if (x->assembled) {
2496     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2497   } else {
2498     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2499   }
2500   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2501   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2506 {
2507   PetscFunctionBegin;
2508   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2509   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 /*@
2514    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2515 
2516    Collective on Mat
2517 
2518    Input Parameters:
2519 +    A - the matrix
2520 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2521 
2522  Level: advanced
2523 
2524 @*/
2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2526 {
2527   PetscErrorCode       ierr;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2535 {
2536   PetscErrorCode       ierr;
2537   PetscBool            sc = PETSC_FALSE,flg;
2538 
2539   PetscFunctionBegin;
2540   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2541   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2542   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2543   if (flg) {
2544     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2545   }
2546   ierr = PetscOptionsTail();CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2551 {
2552   PetscErrorCode ierr;
2553   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2554   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2555 
2556   PetscFunctionBegin;
2557   if (!Y->preallocated) {
2558     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2559   } else if (!aij->nz) {
2560     PetscInt nonew = aij->nonew;
2561     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2562     aij->nonew = nonew;
2563   }
2564   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2569 {
2570   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2571   PetscErrorCode ierr;
2572 
2573   PetscFunctionBegin;
2574   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2575   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2576   if (d) {
2577     PetscInt rstart;
2578     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2579     *d += rstart;
2580 
2581   }
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2586 {
2587   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 /* -------------------------------------------------------------------*/
2596 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2597                                        MatGetRow_MPIAIJ,
2598                                        MatRestoreRow_MPIAIJ,
2599                                        MatMult_MPIAIJ,
2600                                 /* 4*/ MatMultAdd_MPIAIJ,
2601                                        MatMultTranspose_MPIAIJ,
2602                                        MatMultTransposeAdd_MPIAIJ,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                 /*10*/ 0,
2607                                        0,
2608                                        0,
2609                                        MatSOR_MPIAIJ,
2610                                        MatTranspose_MPIAIJ,
2611                                 /*15*/ MatGetInfo_MPIAIJ,
2612                                        MatEqual_MPIAIJ,
2613                                        MatGetDiagonal_MPIAIJ,
2614                                        MatDiagonalScale_MPIAIJ,
2615                                        MatNorm_MPIAIJ,
2616                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2617                                        MatAssemblyEnd_MPIAIJ,
2618                                        MatSetOption_MPIAIJ,
2619                                        MatZeroEntries_MPIAIJ,
2620                                 /*24*/ MatZeroRows_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*29*/ MatSetUp_MPIAIJ,
2626                                        0,
2627                                        0,
2628                                        MatGetDiagonalBlock_MPIAIJ,
2629                                        0,
2630                                 /*34*/ MatDuplicate_MPIAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*39*/ MatAXPY_MPIAIJ,
2636                                        MatCreateSubMatrices_MPIAIJ,
2637                                        MatIncreaseOverlap_MPIAIJ,
2638                                        MatGetValues_MPIAIJ,
2639                                        MatCopy_MPIAIJ,
2640                                 /*44*/ MatGetRowMax_MPIAIJ,
2641                                        MatScale_MPIAIJ,
2642                                        MatShift_MPIAIJ,
2643                                        MatDiagonalSet_MPIAIJ,
2644                                        MatZeroRowsColumns_MPIAIJ,
2645                                 /*49*/ MatSetRandom_MPIAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2651                                        0,
2652                                        MatSetUnfactored_MPIAIJ,
2653                                        MatPermute_MPIAIJ,
2654                                        0,
2655                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2656                                        MatDestroy_MPIAIJ,
2657                                        MatView_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                 /*64*/ 0,
2661                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2666                                        MatGetRowMinAbs_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*75*/ MatFDColoringApply_AIJ,
2672                                        MatSetFromOptions_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        MatFindZeroDiagonals_MPIAIJ,
2676                                 /*80*/ 0,
2677                                        0,
2678                                        0,
2679                                 /*83*/ MatLoad_MPIAIJ,
2680                                        MatIsSymmetric_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*89*/ 0,
2686                                        0,
2687                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2688                                        0,
2689                                        0,
2690                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        MatBindToCPU_MPIAIJ,
2695                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                        MatConjugate_MPIAIJ,
2699                                        0,
2700                                 /*104*/MatSetValuesRow_MPIAIJ,
2701                                        MatRealPart_MPIAIJ,
2702                                        MatImaginaryPart_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                 /*109*/0,
2706                                        0,
2707                                        MatGetRowMin_MPIAIJ,
2708                                        0,
2709                                        MatMissingDiagonal_MPIAIJ,
2710                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2711                                        0,
2712                                        MatGetGhosts_MPIAIJ,
2713                                        0,
2714                                        0,
2715                                 /*119*/0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        MatGetMultiProcBlock_MPIAIJ,
2720                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2721                                        MatGetColumnNorms_MPIAIJ,
2722                                        MatInvertBlockDiagonal_MPIAIJ,
2723                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2724                                        MatCreateSubMatricesMPI_MPIAIJ,
2725                                 /*129*/0,
2726                                        0,
2727                                        0,
2728                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2729                                        0,
2730                                 /*134*/0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                 /*139*/MatSetBlockSizes_MPIAIJ,
2736                                        0,
2737                                        0,
2738                                        MatFDColoringSetUp_MPIXAIJ,
2739                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2740                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2741                                 /*145*/0,
2742                                        0,
2743                                        0
2744 };
2745 
2746 /* ----------------------------------------------------------------------------------------*/
2747 
2748 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2749 {
2750   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2755   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2760 {
2761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2762   PetscErrorCode ierr;
2763 
2764   PetscFunctionBegin;
2765   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2766   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2771 {
2772   Mat_MPIAIJ     *b;
2773   PetscErrorCode ierr;
2774   PetscMPIInt    size;
2775 
2776   PetscFunctionBegin;
2777   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2778   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2779   b = (Mat_MPIAIJ*)B->data;
2780 
2781 #if defined(PETSC_USE_CTABLE)
2782   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2783 #else
2784   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2785 #endif
2786   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2787   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2788   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2789 
2790   /* Because the B will have been resized we simply destroy it and create a new one each time */
2791   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2792   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2793   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2794   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2795   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2796   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2797   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2798 
2799   if (!B->preallocated) {
2800     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2801     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2802     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2803     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2804     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2805   }
2806 
2807   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2808   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2809   B->preallocated  = PETSC_TRUE;
2810   B->was_assembled = PETSC_FALSE;
2811   B->assembled     = PETSC_FALSE;
2812 
2813   /* Set inode option */
2814   if (b->inode_setoption) {
2815     ierr = MatSetOption(b->A,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr);
2816     ierr = MatSetOption(b->B,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr);
2817   }
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2822 {
2823   Mat_MPIAIJ     *b;
2824   PetscErrorCode ierr;
2825 
2826   PetscFunctionBegin;
2827   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2828   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2830   b = (Mat_MPIAIJ*)B->data;
2831 
2832 #if defined(PETSC_USE_CTABLE)
2833   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2834 #else
2835   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2836 #endif
2837   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2838   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2839   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2840 
2841   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2842   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2843   B->preallocated  = PETSC_TRUE;
2844   B->was_assembled = PETSC_FALSE;
2845   B->assembled = PETSC_FALSE;
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2850 {
2851   Mat            mat;
2852   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2853   PetscErrorCode ierr;
2854 
2855   PetscFunctionBegin;
2856   *newmat = 0;
2857   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2858   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2859   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2860   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2861   a       = (Mat_MPIAIJ*)mat->data;
2862 
2863   mat->factortype   = matin->factortype;
2864   mat->assembled    = matin->assembled;
2865   mat->insertmode   = NOT_SET_VALUES;
2866   mat->preallocated = matin->preallocated;
2867 
2868   a->size         = oldmat->size;
2869   a->rank         = oldmat->rank;
2870   a->donotstash   = oldmat->donotstash;
2871   a->roworiented  = oldmat->roworiented;
2872   a->rowindices   = NULL;
2873   a->rowvalues    = NULL;
2874   a->getrowactive = PETSC_FALSE;
2875 
2876   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2877   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2878 
2879   if (oldmat->colmap) {
2880 #if defined(PETSC_USE_CTABLE)
2881     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2882 #else
2883     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2884     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2885     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2886 #endif
2887   } else a->colmap = NULL;
2888   if (oldmat->garray) {
2889     PetscInt len;
2890     len  = oldmat->B->cmap->n;
2891     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2892     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2893     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2894   } else a->garray = NULL;
2895 
2896   /* It may happen MatDuplicate is called with a non-assembled matrix
2897      In fact, MatDuplicate only requires the matrix to be preallocated
2898      This may happen inside a DMCreateMatrix_Shell */
2899   if (oldmat->lvec) {
2900     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2901     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2902   }
2903   if (oldmat->Mvctx) {
2904     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2905     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2906   }
2907   if (oldmat->Mvctx_mpi1) {
2908     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2909     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2910   }
2911 
2912   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2913   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2914   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2915   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2916   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2917   *newmat = mat;
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2922 {
2923   PetscBool      isbinary, ishdf5;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2928   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2929   /* force binary viewer to load .info file if it has not yet done so */
2930   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2931   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2932   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2933   if (isbinary) {
2934     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2935   } else if (ishdf5) {
2936 #if defined(PETSC_HAVE_HDF5)
2937     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2938 #else
2939     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2940 #endif
2941   } else {
2942     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2943   }
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2948 {
2949   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2950   PetscInt       *rowidxs,*colidxs;
2951   PetscScalar    *matvals;
2952   PetscErrorCode ierr;
2953 
2954   PetscFunctionBegin;
2955   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2956 
2957   /* read in matrix header */
2958   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2959   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2960   M  = header[1]; N = header[2]; nz = header[3];
2961   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2962   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2963   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2964 
2965   /* set block sizes from the viewer's .info file */
2966   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2967   /* set global sizes if not set already */
2968   if (mat->rmap->N < 0) mat->rmap->N = M;
2969   if (mat->cmap->N < 0) mat->cmap->N = N;
2970   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2971   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2972 
2973   /* check if the matrix sizes are correct */
2974   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2975   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2976 
2977   /* read in row lengths and build row indices */
2978   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2979   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2980   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2981   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2982   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2983   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2984   /* read in column indices and matrix values */
2985   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2986   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2987   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2988   /* store matrix indices and values */
2989   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2990   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2991   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 /* Not scalable because of ISAllGather() unless getting all columns. */
2996 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2997 {
2998   PetscErrorCode ierr;
2999   IS             iscol_local;
3000   PetscBool      isstride;
3001   PetscMPIInt    lisstride=0,gisstride;
3002 
3003   PetscFunctionBegin;
3004   /* check if we are grabbing all columns*/
3005   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3006 
3007   if (isstride) {
3008     PetscInt  start,len,mstart,mlen;
3009     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3010     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3011     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3012     if (mstart == start && mlen-mstart == len) lisstride = 1;
3013   }
3014 
3015   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3016   if (gisstride) {
3017     PetscInt N;
3018     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3019     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3020     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3021     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3022   } else {
3023     PetscInt cbs;
3024     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3025     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3026     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3027   }
3028 
3029   *isseq = iscol_local;
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 /*
3034  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3035  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3036 
3037  Input Parameters:
3038    mat - matrix
3039    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3040            i.e., mat->rstart <= isrow[i] < mat->rend
3041    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3042            i.e., mat->cstart <= iscol[i] < mat->cend
3043  Output Parameter:
3044    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3045    iscol_o - sequential column index set for retrieving mat->B
3046    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3047  */
3048 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3049 {
3050   PetscErrorCode ierr;
3051   Vec            x,cmap;
3052   const PetscInt *is_idx;
3053   PetscScalar    *xarray,*cmaparray;
3054   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3055   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3056   Mat            B=a->B;
3057   Vec            lvec=a->lvec,lcmap;
3058   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3059   MPI_Comm       comm;
3060   VecScatter     Mvctx=a->Mvctx;
3061 
3062   PetscFunctionBegin;
3063   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3064   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3065 
3066   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3067   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3068   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3069   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3070   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3071 
3072   /* Get start indices */
3073   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3074   isstart -= ncols;
3075   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3076 
3077   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3078   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3079   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3080   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3081   for (i=0; i<ncols; i++) {
3082     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3083     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3084     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3085   }
3086   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3087   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3088   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3089 
3090   /* Get iscol_d */
3091   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3092   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3093   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3094 
3095   /* Get isrow_d */
3096   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3097   rstart = mat->rmap->rstart;
3098   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3099   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3100   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3101   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3102 
3103   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3104   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3105   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3106 
3107   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3108   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3109   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3110 
3111   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3112 
3113   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3114   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3115 
3116   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3117   /* off-process column indices */
3118   count = 0;
3119   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3120   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3121 
3122   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3123   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3124   for (i=0; i<Bn; i++) {
3125     if (PetscRealPart(xarray[i]) > -1.0) {
3126       idx[count]     = i;                   /* local column index in off-diagonal part B */
3127       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3128       count++;
3129     }
3130   }
3131   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3132   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3133 
3134   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3135   /* cannot ensure iscol_o has same blocksize as iscol! */
3136 
3137   ierr = PetscFree(idx);CHKERRQ(ierr);
3138   *garray = cmap1;
3139 
3140   ierr = VecDestroy(&x);CHKERRQ(ierr);
3141   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3142   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 
3146 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3147 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3148 {
3149   PetscErrorCode ierr;
3150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3151   Mat            M = NULL;
3152   MPI_Comm       comm;
3153   IS             iscol_d,isrow_d,iscol_o;
3154   Mat            Asub = NULL,Bsub = NULL;
3155   PetscInt       n;
3156 
3157   PetscFunctionBegin;
3158   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3159 
3160   if (call == MAT_REUSE_MATRIX) {
3161     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3162     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3163     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3164 
3165     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3166     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3167 
3168     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3169     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3170 
3171     /* Update diagonal and off-diagonal portions of submat */
3172     asub = (Mat_MPIAIJ*)(*submat)->data;
3173     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3174     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3175     if (n) {
3176       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3177     }
3178     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3179     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3180 
3181   } else { /* call == MAT_INITIAL_MATRIX) */
3182     const PetscInt *garray;
3183     PetscInt        BsubN;
3184 
3185     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3186     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3187 
3188     /* Create local submatrices Asub and Bsub */
3189     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3190     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3191 
3192     /* Create submatrix M */
3193     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3194 
3195     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3196     asub = (Mat_MPIAIJ*)M->data;
3197 
3198     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3199     n = asub->B->cmap->N;
3200     if (BsubN > n) {
3201       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3202       const PetscInt *idx;
3203       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3204       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3205 
3206       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3207       j = 0;
3208       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3209       for (i=0; i<n; i++) {
3210         if (j >= BsubN) break;
3211         while (subgarray[i] > garray[j]) j++;
3212 
3213         if (subgarray[i] == garray[j]) {
3214           idx_new[i] = idx[j++];
3215         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3216       }
3217       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3218 
3219       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3220       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3221 
3222     } else if (BsubN < n) {
3223       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3224     }
3225 
3226     ierr = PetscFree(garray);CHKERRQ(ierr);
3227     *submat = M;
3228 
3229     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3230     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3231     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3232 
3233     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3234     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3235 
3236     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3237     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3238   }
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3243 {
3244   PetscErrorCode ierr;
3245   IS             iscol_local=NULL,isrow_d;
3246   PetscInt       csize;
3247   PetscInt       n,i,j,start,end;
3248   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3249   MPI_Comm       comm;
3250 
3251   PetscFunctionBegin;
3252   /* If isrow has same processor distribution as mat,
3253      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3254   if (call == MAT_REUSE_MATRIX) {
3255     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3256     if (isrow_d) {
3257       sameRowDist  = PETSC_TRUE;
3258       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3259     } else {
3260       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3261       if (iscol_local) {
3262         sameRowDist  = PETSC_TRUE;
3263         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3264       }
3265     }
3266   } else {
3267     /* Check if isrow has same processor distribution as mat */
3268     sameDist[0] = PETSC_FALSE;
3269     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3270     if (!n) {
3271       sameDist[0] = PETSC_TRUE;
3272     } else {
3273       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3274       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3275       if (i >= start && j < end) {
3276         sameDist[0] = PETSC_TRUE;
3277       }
3278     }
3279 
3280     /* Check if iscol has same processor distribution as mat */
3281     sameDist[1] = PETSC_FALSE;
3282     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3283     if (!n) {
3284       sameDist[1] = PETSC_TRUE;
3285     } else {
3286       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3287       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3288       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3289     }
3290 
3291     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3292     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3293     sameRowDist = tsameDist[0];
3294   }
3295 
3296   if (sameRowDist) {
3297     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3298       /* isrow and iscol have same processor distribution as mat */
3299       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3300       PetscFunctionReturn(0);
3301     } else { /* sameRowDist */
3302       /* isrow has same processor distribution as mat */
3303       if (call == MAT_INITIAL_MATRIX) {
3304         PetscBool sorted;
3305         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3306         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3307         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3308         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3309 
3310         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3311         if (sorted) {
3312           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3313           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3314           PetscFunctionReturn(0);
3315         }
3316       } else { /* call == MAT_REUSE_MATRIX */
3317         IS    iscol_sub;
3318         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3319         if (iscol_sub) {
3320           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3321           PetscFunctionReturn(0);
3322         }
3323       }
3324     }
3325   }
3326 
3327   /* General case: iscol -> iscol_local which has global size of iscol */
3328   if (call == MAT_REUSE_MATRIX) {
3329     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3330     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3331   } else {
3332     if (!iscol_local) {
3333       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3334     }
3335   }
3336 
3337   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3338   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3339 
3340   if (call == MAT_INITIAL_MATRIX) {
3341     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3342     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3343   }
3344   PetscFunctionReturn(0);
3345 }
3346 
3347 /*@C
3348      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3349          and "off-diagonal" part of the matrix in CSR format.
3350 
3351    Collective
3352 
3353    Input Parameters:
3354 +  comm - MPI communicator
3355 .  A - "diagonal" portion of matrix
3356 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3357 -  garray - global index of B columns
3358 
3359    Output Parameter:
3360 .   mat - the matrix, with input A as its local diagonal matrix
3361    Level: advanced
3362 
3363    Notes:
3364        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3365        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3366 
3367 .seealso: MatCreateMPIAIJWithSplitArrays()
3368 @*/
3369 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3370 {
3371   PetscErrorCode ierr;
3372   Mat_MPIAIJ     *maij;
3373   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3374   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3375   PetscScalar    *oa=b->a;
3376   Mat            Bnew;
3377   PetscInt       m,n,N;
3378 
3379   PetscFunctionBegin;
3380   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3381   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3382   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3383   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3384   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3385   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3386 
3387   /* Get global columns of mat */
3388   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3389 
3390   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3391   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3392   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3393   maij = (Mat_MPIAIJ*)(*mat)->data;
3394 
3395   (*mat)->preallocated = PETSC_TRUE;
3396 
3397   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3398   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3399 
3400   /* Set A as diagonal portion of *mat */
3401   maij->A = A;
3402 
3403   nz = oi[m];
3404   for (i=0; i<nz; i++) {
3405     col   = oj[i];
3406     oj[i] = garray[col];
3407   }
3408 
3409    /* Set Bnew as off-diagonal portion of *mat */
3410   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3411   bnew        = (Mat_SeqAIJ*)Bnew->data;
3412   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3413   maij->B     = Bnew;
3414 
3415   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3416 
3417   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3418   b->free_a       = PETSC_FALSE;
3419   b->free_ij      = PETSC_FALSE;
3420   ierr = MatDestroy(&B);CHKERRQ(ierr);
3421 
3422   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3423   bnew->free_a       = PETSC_TRUE;
3424   bnew->free_ij      = PETSC_TRUE;
3425 
3426   /* condense columns of maij->B */
3427   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3428   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3429   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3430   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3431   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3432   PetscFunctionReturn(0);
3433 }
3434 
3435 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3436 
3437 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3438 {
3439   PetscErrorCode ierr;
3440   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3441   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3442   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3443   Mat            M,Msub,B=a->B;
3444   MatScalar      *aa;
3445   Mat_SeqAIJ     *aij;
3446   PetscInt       *garray = a->garray,*colsub,Ncols;
3447   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3448   IS             iscol_sub,iscmap;
3449   const PetscInt *is_idx,*cmap;
3450   PetscBool      allcolumns=PETSC_FALSE;
3451   MPI_Comm       comm;
3452 
3453   PetscFunctionBegin;
3454   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3455 
3456   if (call == MAT_REUSE_MATRIX) {
3457     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3458     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3459     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3460 
3461     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3462     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3463 
3464     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3465     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3466 
3467     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3468 
3469   } else { /* call == MAT_INITIAL_MATRIX) */
3470     PetscBool flg;
3471 
3472     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3473     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3474 
3475     /* (1) iscol -> nonscalable iscol_local */
3476     /* Check for special case: each processor gets entire matrix columns */
3477     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3478     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3479     if (allcolumns) {
3480       iscol_sub = iscol_local;
3481       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3482       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3483 
3484     } else {
3485       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3486       PetscInt *idx,*cmap1,k;
3487       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3488       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3489       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3490       count = 0;
3491       k     = 0;
3492       for (i=0; i<Ncols; i++) {
3493         j = is_idx[i];
3494         if (j >= cstart && j < cend) {
3495           /* diagonal part of mat */
3496           idx[count]     = j;
3497           cmap1[count++] = i; /* column index in submat */
3498         } else if (Bn) {
3499           /* off-diagonal part of mat */
3500           if (j == garray[k]) {
3501             idx[count]     = j;
3502             cmap1[count++] = i;  /* column index in submat */
3503           } else if (j > garray[k]) {
3504             while (j > garray[k] && k < Bn-1) k++;
3505             if (j == garray[k]) {
3506               idx[count]     = j;
3507               cmap1[count++] = i; /* column index in submat */
3508             }
3509           }
3510         }
3511       }
3512       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3513 
3514       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3515       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3516       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3517 
3518       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3519     }
3520 
3521     /* (3) Create sequential Msub */
3522     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3523   }
3524 
3525   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3526   aij  = (Mat_SeqAIJ*)(Msub)->data;
3527   ii   = aij->i;
3528   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3529 
3530   /*
3531       m - number of local rows
3532       Ncols - number of columns (same on all processors)
3533       rstart - first row in new global matrix generated
3534   */
3535   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3536 
3537   if (call == MAT_INITIAL_MATRIX) {
3538     /* (4) Create parallel newmat */
3539     PetscMPIInt    rank,size;
3540     PetscInt       csize;
3541 
3542     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3543     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3544 
3545     /*
3546         Determine the number of non-zeros in the diagonal and off-diagonal
3547         portions of the matrix in order to do correct preallocation
3548     */
3549 
3550     /* first get start and end of "diagonal" columns */
3551     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3552     if (csize == PETSC_DECIDE) {
3553       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3554       if (mglobal == Ncols) { /* square matrix */
3555         nlocal = m;
3556       } else {
3557         nlocal = Ncols/size + ((Ncols % size) > rank);
3558       }
3559     } else {
3560       nlocal = csize;
3561     }
3562     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3563     rstart = rend - nlocal;
3564     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3565 
3566     /* next, compute all the lengths */
3567     jj    = aij->j;
3568     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3569     olens = dlens + m;
3570     for (i=0; i<m; i++) {
3571       jend = ii[i+1] - ii[i];
3572       olen = 0;
3573       dlen = 0;
3574       for (j=0; j<jend; j++) {
3575         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3576         else dlen++;
3577         jj++;
3578       }
3579       olens[i] = olen;
3580       dlens[i] = dlen;
3581     }
3582 
3583     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3584     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3585 
3586     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3587     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3588     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3589     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3590     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3591     ierr = PetscFree(dlens);CHKERRQ(ierr);
3592 
3593   } else { /* call == MAT_REUSE_MATRIX */
3594     M    = *newmat;
3595     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3596     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3597     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3598     /*
3599          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3600        rather than the slower MatSetValues().
3601     */
3602     M->was_assembled = PETSC_TRUE;
3603     M->assembled     = PETSC_FALSE;
3604   }
3605 
3606   /* (5) Set values of Msub to *newmat */
3607   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3608   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3609 
3610   jj   = aij->j;
3611   aa   = aij->a;
3612   for (i=0; i<m; i++) {
3613     row = rstart + i;
3614     nz  = ii[i+1] - ii[i];
3615     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3616     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3617     jj += nz; aa += nz;
3618   }
3619   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3620 
3621   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623 
3624   ierr = PetscFree(colsub);CHKERRQ(ierr);
3625 
3626   /* save Msub, iscol_sub and iscmap used in processor for next request */
3627   if (call ==  MAT_INITIAL_MATRIX) {
3628     *newmat = M;
3629     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3630     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3631 
3632     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3633     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3634 
3635     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3636     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3637 
3638     if (iscol_local) {
3639       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3640       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3641     }
3642   }
3643   PetscFunctionReturn(0);
3644 }
3645 
3646 /*
3647     Not great since it makes two copies of the submatrix, first an SeqAIJ
3648   in local and then by concatenating the local matrices the end result.
3649   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3650 
3651   Note: This requires a sequential iscol with all indices.
3652 */
3653 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3654 {
3655   PetscErrorCode ierr;
3656   PetscMPIInt    rank,size;
3657   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3658   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3659   Mat            M,Mreuse;
3660   MatScalar      *aa,*vwork;
3661   MPI_Comm       comm;
3662   Mat_SeqAIJ     *aij;
3663   PetscBool      colflag,allcolumns=PETSC_FALSE;
3664 
3665   PetscFunctionBegin;
3666   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3667   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3668   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3669 
3670   /* Check for special case: each processor gets entire matrix columns */
3671   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3672   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3673   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3674 
3675   if (call ==  MAT_REUSE_MATRIX) {
3676     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3677     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3678     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3679   } else {
3680     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3681   }
3682 
3683   /*
3684       m - number of local rows
3685       n - number of columns (same on all processors)
3686       rstart - first row in new global matrix generated
3687   */
3688   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3689   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3690   if (call == MAT_INITIAL_MATRIX) {
3691     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3692     ii  = aij->i;
3693     jj  = aij->j;
3694 
3695     /*
3696         Determine the number of non-zeros in the diagonal and off-diagonal
3697         portions of the matrix in order to do correct preallocation
3698     */
3699 
3700     /* first get start and end of "diagonal" columns */
3701     if (csize == PETSC_DECIDE) {
3702       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3703       if (mglobal == n) { /* square matrix */
3704         nlocal = m;
3705       } else {
3706         nlocal = n/size + ((n % size) > rank);
3707       }
3708     } else {
3709       nlocal = csize;
3710     }
3711     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3712     rstart = rend - nlocal;
3713     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3714 
3715     /* next, compute all the lengths */
3716     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3717     olens = dlens + m;
3718     for (i=0; i<m; i++) {
3719       jend = ii[i+1] - ii[i];
3720       olen = 0;
3721       dlen = 0;
3722       for (j=0; j<jend; j++) {
3723         if (*jj < rstart || *jj >= rend) olen++;
3724         else dlen++;
3725         jj++;
3726       }
3727       olens[i] = olen;
3728       dlens[i] = dlen;
3729     }
3730     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3731     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3732     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3733     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3734     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3735     ierr = PetscFree(dlens);CHKERRQ(ierr);
3736   } else {
3737     PetscInt ml,nl;
3738 
3739     M    = *newmat;
3740     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3741     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3742     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3743     /*
3744          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3745        rather than the slower MatSetValues().
3746     */
3747     M->was_assembled = PETSC_TRUE;
3748     M->assembled     = PETSC_FALSE;
3749   }
3750   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3751   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3752   ii   = aij->i;
3753   jj   = aij->j;
3754   aa   = aij->a;
3755   for (i=0; i<m; i++) {
3756     row   = rstart + i;
3757     nz    = ii[i+1] - ii[i];
3758     cwork = jj;     jj += nz;
3759     vwork = aa;     aa += nz;
3760     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3761   }
3762 
3763   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   *newmat = M;
3766 
3767   /* save submatrix used in processor for next request */
3768   if (call ==  MAT_INITIAL_MATRIX) {
3769     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3770     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3771   }
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3776 {
3777   PetscInt       m,cstart, cend,j,nnz,i,d;
3778   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3779   const PetscInt *JJ;
3780   PetscErrorCode ierr;
3781   PetscBool      nooffprocentries;
3782 
3783   PetscFunctionBegin;
3784   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3785 
3786   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3787   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3788   m      = B->rmap->n;
3789   cstart = B->cmap->rstart;
3790   cend   = B->cmap->rend;
3791   rstart = B->rmap->rstart;
3792 
3793   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3794 
3795   if (PetscDefined(USE_DEBUG)) {
3796     for (i=0; i<m; i++) {
3797       nnz = Ii[i+1]- Ii[i];
3798       JJ  = J + Ii[i];
3799       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3800       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3801       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3802     }
3803   }
3804 
3805   for (i=0; i<m; i++) {
3806     nnz     = Ii[i+1]- Ii[i];
3807     JJ      = J + Ii[i];
3808     nnz_max = PetscMax(nnz_max,nnz);
3809     d       = 0;
3810     for (j=0; j<nnz; j++) {
3811       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3812     }
3813     d_nnz[i] = d;
3814     o_nnz[i] = nnz - d;
3815   }
3816   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3817   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3818 
3819   for (i=0; i<m; i++) {
3820     ii   = i + rstart;
3821     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3822   }
3823   nooffprocentries    = B->nooffprocentries;
3824   B->nooffprocentries = PETSC_TRUE;
3825   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3826   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3827   B->nooffprocentries = nooffprocentries;
3828 
3829   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 /*@
3834    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3835    (the default parallel PETSc format).
3836 
3837    Collective
3838 
3839    Input Parameters:
3840 +  B - the matrix
3841 .  i - the indices into j for the start of each local row (starts with zero)
3842 .  j - the column indices for each local row (starts with zero)
3843 -  v - optional values in the matrix
3844 
3845    Level: developer
3846 
3847    Notes:
3848        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3849      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3850      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3851 
3852        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3853 
3854        The format which is used for the sparse matrix input, is equivalent to a
3855     row-major ordering.. i.e for the following matrix, the input data expected is
3856     as shown
3857 
3858 $        1 0 0
3859 $        2 0 3     P0
3860 $       -------
3861 $        4 5 6     P1
3862 $
3863 $     Process0 [P0]: rows_owned=[0,1]
3864 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3865 $        j =  {0,0,2}  [size = 3]
3866 $        v =  {1,2,3}  [size = 3]
3867 $
3868 $     Process1 [P1]: rows_owned=[2]
3869 $        i =  {0,3}    [size = nrow+1  = 1+1]
3870 $        j =  {0,1,2}  [size = 3]
3871 $        v =  {4,5,6}  [size = 3]
3872 
3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3874           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3875 @*/
3876 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3877 {
3878   PetscErrorCode ierr;
3879 
3880   PetscFunctionBegin;
3881   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3882   PetscFunctionReturn(0);
3883 }
3884 
3885 /*@C
3886    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3887    (the default parallel PETSc format).  For good matrix assembly performance
3888    the user should preallocate the matrix storage by setting the parameters
3889    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3890    performance can be increased by more than a factor of 50.
3891 
3892    Collective
3893 
3894    Input Parameters:
3895 +  B - the matrix
3896 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3897            (same value is used for all local rows)
3898 .  d_nnz - array containing the number of nonzeros in the various rows of the
3899            DIAGONAL portion of the local submatrix (possibly different for each row)
3900            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3901            The size of this array is equal to the number of local rows, i.e 'm'.
3902            For matrices that will be factored, you must leave room for (and set)
3903            the diagonal entry even if it is zero.
3904 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3905            submatrix (same value is used for all local rows).
3906 -  o_nnz - array containing the number of nonzeros in the various rows of the
3907            OFF-DIAGONAL portion of the local submatrix (possibly different for
3908            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3909            structure. The size of this array is equal to the number
3910            of local rows, i.e 'm'.
3911 
3912    If the *_nnz parameter is given then the *_nz parameter is ignored
3913 
3914    The AIJ format (also called the Yale sparse matrix format or
3915    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3916    storage.  The stored row and column indices begin with zero.
3917    See Users-Manual: ch_mat for details.
3918 
3919    The parallel matrix is partitioned such that the first m0 rows belong to
3920    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3921    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3922 
3923    The DIAGONAL portion of the local submatrix of a processor can be defined
3924    as the submatrix which is obtained by extraction the part corresponding to
3925    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3926    first row that belongs to the processor, r2 is the last row belonging to
3927    the this processor, and c1-c2 is range of indices of the local part of a
3928    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3929    common case of a square matrix, the row and column ranges are the same and
3930    the DIAGONAL part is also square. The remaining portion of the local
3931    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3932 
3933    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3934 
3935    You can call MatGetInfo() to get information on how effective the preallocation was;
3936    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3937    You can also run with the option -info and look for messages with the string
3938    malloc in them to see if additional memory allocation was needed.
3939 
3940    Example usage:
3941 
3942    Consider the following 8x8 matrix with 34 non-zero values, that is
3943    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3944    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3945    as follows:
3946 
3947 .vb
3948             1  2  0  |  0  3  0  |  0  4
3949     Proc0   0  5  6  |  7  0  0  |  8  0
3950             9  0 10  | 11  0  0  | 12  0
3951     -------------------------------------
3952            13  0 14  | 15 16 17  |  0  0
3953     Proc1   0 18  0  | 19 20 21  |  0  0
3954             0  0  0  | 22 23  0  | 24  0
3955     -------------------------------------
3956     Proc2  25 26 27  |  0  0 28  | 29  0
3957            30  0  0  | 31 32 33  |  0 34
3958 .ve
3959 
3960    This can be represented as a collection of submatrices as:
3961 
3962 .vb
3963       A B C
3964       D E F
3965       G H I
3966 .ve
3967 
3968    Where the submatrices A,B,C are owned by proc0, D,E,F are
3969    owned by proc1, G,H,I are owned by proc2.
3970 
3971    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3972    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3973    The 'M','N' parameters are 8,8, and have the same values on all procs.
3974 
3975    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3976    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3977    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3978    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3979    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3980    matrix, ans [DF] as another SeqAIJ matrix.
3981 
3982    When d_nz, o_nz parameters are specified, d_nz storage elements are
3983    allocated for every row of the local diagonal submatrix, and o_nz
3984    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3985    One way to choose d_nz and o_nz is to use the max nonzerors per local
3986    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3987    In this case, the values of d_nz,o_nz are:
3988 .vb
3989      proc0 : dnz = 2, o_nz = 2
3990      proc1 : dnz = 3, o_nz = 2
3991      proc2 : dnz = 1, o_nz = 4
3992 .ve
3993    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3994    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3995    for proc3. i.e we are using 12+15+10=37 storage locations to store
3996    34 values.
3997 
3998    When d_nnz, o_nnz parameters are specified, the storage is specified
3999    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4000    In the above case the values for d_nnz,o_nnz are:
4001 .vb
4002      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4003      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4004      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4005 .ve
4006    Here the space allocated is sum of all the above values i.e 34, and
4007    hence pre-allocation is perfect.
4008 
4009    Level: intermediate
4010 
4011 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4012           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4013 @*/
4014 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4015 {
4016   PetscErrorCode ierr;
4017 
4018   PetscFunctionBegin;
4019   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4020   PetscValidType(B,1);
4021   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4022   PetscFunctionReturn(0);
4023 }
4024 
4025 /*@
4026      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4027          CSR format for the local rows.
4028 
4029    Collective
4030 
4031    Input Parameters:
4032 +  comm - MPI communicator
4033 .  m - number of local rows (Cannot be PETSC_DECIDE)
4034 .  n - This value should be the same as the local size used in creating the
4035        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4036        calculated if N is given) For square matrices n is almost always m.
4037 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4038 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4039 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4040 .   j - column indices
4041 -   a - matrix values
4042 
4043    Output Parameter:
4044 .   mat - the matrix
4045 
4046    Level: intermediate
4047 
4048    Notes:
4049        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4050      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4051      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4052 
4053        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4054 
4055        The format which is used for the sparse matrix input, is equivalent to a
4056     row-major ordering.. i.e for the following matrix, the input data expected is
4057     as shown
4058 
4059        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4060 
4061 $        1 0 0
4062 $        2 0 3     P0
4063 $       -------
4064 $        4 5 6     P1
4065 $
4066 $     Process0 [P0]: rows_owned=[0,1]
4067 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4068 $        j =  {0,0,2}  [size = 3]
4069 $        v =  {1,2,3}  [size = 3]
4070 $
4071 $     Process1 [P1]: rows_owned=[2]
4072 $        i =  {0,3}    [size = nrow+1  = 1+1]
4073 $        j =  {0,1,2}  [size = 3]
4074 $        v =  {4,5,6}  [size = 3]
4075 
4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4077           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4078 @*/
4079 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4080 {
4081   PetscErrorCode ierr;
4082 
4083   PetscFunctionBegin;
4084   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4085   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4086   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4087   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4088   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4089   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4090   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4091   PetscFunctionReturn(0);
4092 }
4093 
4094 /*@
4095      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4096          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4097 
4098    Collective
4099 
4100    Input Parameters:
4101 +  mat - the matrix
4102 .  m - number of local rows (Cannot be PETSC_DECIDE)
4103 .  n - This value should be the same as the local size used in creating the
4104        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4105        calculated if N is given) For square matrices n is almost always m.
4106 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4107 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4108 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4109 .  J - column indices
4110 -  v - matrix values
4111 
4112    Level: intermediate
4113 
4114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4115           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4116 @*/
4117 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4118 {
4119   PetscErrorCode ierr;
4120   PetscInt       cstart,nnz,i,j;
4121   PetscInt       *ld;
4122   PetscBool      nooffprocentries;
4123   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4124   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4125   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4126   const PetscInt *Adi = Ad->i;
4127   PetscInt       ldi,Iii,md;
4128 
4129   PetscFunctionBegin;
4130   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4131   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4132   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4133   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4134 
4135   cstart = mat->cmap->rstart;
4136   if (!Aij->ld) {
4137     /* count number of entries below block diagonal */
4138     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4139     Aij->ld = ld;
4140     for (i=0; i<m; i++) {
4141       nnz  = Ii[i+1]- Ii[i];
4142       j     = 0;
4143       while  (J[j] < cstart && j < nnz) {j++;}
4144       J    += nnz;
4145       ld[i] = j;
4146     }
4147   } else {
4148     ld = Aij->ld;
4149   }
4150 
4151   for (i=0; i<m; i++) {
4152     nnz  = Ii[i+1]- Ii[i];
4153     Iii  = Ii[i];
4154     ldi  = ld[i];
4155     md   = Adi[i+1]-Adi[i];
4156     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4157     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4158     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4159     ad  += md;
4160     ao  += nnz - md;
4161   }
4162   nooffprocentries      = mat->nooffprocentries;
4163   mat->nooffprocentries = PETSC_TRUE;
4164   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4165   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4166   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4167   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4168   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4169   mat->nooffprocentries = nooffprocentries;
4170   PetscFunctionReturn(0);
4171 }
4172 
4173 /*@C
4174    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4175    (the default parallel PETSc format).  For good matrix assembly performance
4176    the user should preallocate the matrix storage by setting the parameters
4177    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4178    performance can be increased by more than a factor of 50.
4179 
4180    Collective
4181 
4182    Input Parameters:
4183 +  comm - MPI communicator
4184 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4185            This value should be the same as the local size used in creating the
4186            y vector for the matrix-vector product y = Ax.
4187 .  n - This value should be the same as the local size used in creating the
4188        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4189        calculated if N is given) For square matrices n is almost always m.
4190 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4191 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4192 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4193            (same value is used for all local rows)
4194 .  d_nnz - array containing the number of nonzeros in the various rows of the
4195            DIAGONAL portion of the local submatrix (possibly different for each row)
4196            or NULL, if d_nz is used to specify the nonzero structure.
4197            The size of this array is equal to the number of local rows, i.e 'm'.
4198 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4199            submatrix (same value is used for all local rows).
4200 -  o_nnz - array containing the number of nonzeros in the various rows of the
4201            OFF-DIAGONAL portion of the local submatrix (possibly different for
4202            each row) or NULL, if o_nz is used to specify the nonzero
4203            structure. The size of this array is equal to the number
4204            of local rows, i.e 'm'.
4205 
4206    Output Parameter:
4207 .  A - the matrix
4208 
4209    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4210    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4211    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4212 
4213    Notes:
4214    If the *_nnz parameter is given then the *_nz parameter is ignored
4215 
4216    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4217    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4218    storage requirements for this matrix.
4219 
4220    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4221    processor than it must be used on all processors that share the object for
4222    that argument.
4223 
4224    The user MUST specify either the local or global matrix dimensions
4225    (possibly both).
4226 
4227    The parallel matrix is partitioned across processors such that the
4228    first m0 rows belong to process 0, the next m1 rows belong to
4229    process 1, the next m2 rows belong to process 2 etc.. where
4230    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4231    values corresponding to [m x N] submatrix.
4232 
4233    The columns are logically partitioned with the n0 columns belonging
4234    to 0th partition, the next n1 columns belonging to the next
4235    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4236 
4237    The DIAGONAL portion of the local submatrix on any given processor
4238    is the submatrix corresponding to the rows and columns m,n
4239    corresponding to the given processor. i.e diagonal matrix on
4240    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4241    etc. The remaining portion of the local submatrix [m x (N-n)]
4242    constitute the OFF-DIAGONAL portion. The example below better
4243    illustrates this concept.
4244 
4245    For a square global matrix we define each processor's diagonal portion
4246    to be its local rows and the corresponding columns (a square submatrix);
4247    each processor's off-diagonal portion encompasses the remainder of the
4248    local matrix (a rectangular submatrix).
4249 
4250    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4251 
4252    When calling this routine with a single process communicator, a matrix of
4253    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4254    type of communicator, use the construction mechanism
4255 .vb
4256      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4257 .ve
4258 
4259 $     MatCreate(...,&A);
4260 $     MatSetType(A,MATMPIAIJ);
4261 $     MatSetSizes(A, m,n,M,N);
4262 $     MatMPIAIJSetPreallocation(A,...);
4263 
4264    By default, this format uses inodes (identical nodes) when possible.
4265    We search for consecutive rows with the same nonzero structure, thereby
4266    reusing matrix information to achieve increased efficiency.
4267 
4268    Options Database Keys:
4269 +  -mat_no_inode  - Do not use inodes
4270 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4271 
4272 
4273 
4274    Example usage:
4275 
4276    Consider the following 8x8 matrix with 34 non-zero values, that is
4277    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4278    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4279    as follows
4280 
4281 .vb
4282             1  2  0  |  0  3  0  |  0  4
4283     Proc0   0  5  6  |  7  0  0  |  8  0
4284             9  0 10  | 11  0  0  | 12  0
4285     -------------------------------------
4286            13  0 14  | 15 16 17  |  0  0
4287     Proc1   0 18  0  | 19 20 21  |  0  0
4288             0  0  0  | 22 23  0  | 24  0
4289     -------------------------------------
4290     Proc2  25 26 27  |  0  0 28  | 29  0
4291            30  0  0  | 31 32 33  |  0 34
4292 .ve
4293 
4294    This can be represented as a collection of submatrices as
4295 
4296 .vb
4297       A B C
4298       D E F
4299       G H I
4300 .ve
4301 
4302    Where the submatrices A,B,C are owned by proc0, D,E,F are
4303    owned by proc1, G,H,I are owned by proc2.
4304 
4305    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4306    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4307    The 'M','N' parameters are 8,8, and have the same values on all procs.
4308 
4309    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4310    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4311    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4312    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4313    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4314    matrix, ans [DF] as another SeqAIJ matrix.
4315 
4316    When d_nz, o_nz parameters are specified, d_nz storage elements are
4317    allocated for every row of the local diagonal submatrix, and o_nz
4318    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4319    One way to choose d_nz and o_nz is to use the max nonzerors per local
4320    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4321    In this case, the values of d_nz,o_nz are
4322 .vb
4323      proc0 : dnz = 2, o_nz = 2
4324      proc1 : dnz = 3, o_nz = 2
4325      proc2 : dnz = 1, o_nz = 4
4326 .ve
4327    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4328    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4329    for proc3. i.e we are using 12+15+10=37 storage locations to store
4330    34 values.
4331 
4332    When d_nnz, o_nnz parameters are specified, the storage is specified
4333    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4334    In the above case the values for d_nnz,o_nnz are
4335 .vb
4336      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4337      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4338      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4339 .ve
4340    Here the space allocated is sum of all the above values i.e 34, and
4341    hence pre-allocation is perfect.
4342 
4343    Level: intermediate
4344 
4345 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4346           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4347 @*/
4348 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4349 {
4350   PetscErrorCode ierr;
4351   PetscMPIInt    size;
4352 
4353   PetscFunctionBegin;
4354   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4355   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4356   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4357   if (size > 1) {
4358     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4359     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4360   } else {
4361     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4362     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4363   }
4364   PetscFunctionReturn(0);
4365 }
4366 
4367 /*@C
4368   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4369 
4370   Not collective
4371 
4372   Input Parameter:
4373 . A - The MPIAIJ matrix
4374 
4375   Output Parameters:
4376 + Ad - The local diagonal block as a SeqAIJ matrix
4377 . Ao - The local off-diagonal block as a SeqAIJ matrix
4378 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4379 
4380   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4381   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4382   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4383   local column numbers to global column numbers in the original matrix.
4384 
4385   Level: intermediate
4386 
4387 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ
4388 @*/
4389 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4390 {
4391   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4392   PetscBool      flg;
4393   PetscErrorCode ierr;
4394 
4395   PetscFunctionBegin;
4396   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4397   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4398   if (Ad)     *Ad     = a->A;
4399   if (Ao)     *Ao     = a->B;
4400   if (colmap) *colmap = a->garray;
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4405 {
4406   PetscErrorCode ierr;
4407   PetscInt       m,N,i,rstart,nnz,Ii;
4408   PetscInt       *indx;
4409   PetscScalar    *values;
4410 
4411   PetscFunctionBegin;
4412   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4413   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4414     PetscInt       *dnz,*onz,sum,bs,cbs;
4415 
4416     if (n == PETSC_DECIDE) {
4417       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4418     }
4419     /* Check sum(n) = N */
4420     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4421     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4422 
4423     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4424     rstart -= m;
4425 
4426     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4427     for (i=0; i<m; i++) {
4428       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4429       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4430       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4431     }
4432 
4433     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4434     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4435     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4436     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4437     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4438     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4439     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4440     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4441   }
4442 
4443   /* numeric phase */
4444   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4445   for (i=0; i<m; i++) {
4446     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4447     Ii   = i + rstart;
4448     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4449     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4450   }
4451   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4452   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4453   PetscFunctionReturn(0);
4454 }
4455 
4456 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4457 {
4458   PetscErrorCode    ierr;
4459   PetscMPIInt       rank;
4460   PetscInt          m,N,i,rstart,nnz;
4461   size_t            len;
4462   const PetscInt    *indx;
4463   PetscViewer       out;
4464   char              *name;
4465   Mat               B;
4466   const PetscScalar *values;
4467 
4468   PetscFunctionBegin;
4469   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4470   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4471   /* Should this be the type of the diagonal block of A? */
4472   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4473   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4474   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4475   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4476   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4477   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4478   for (i=0; i<m; i++) {
4479     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4480     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4481     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4482   }
4483   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4484   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4485 
4486   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4487   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4488   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4489   sprintf(name,"%s.%d",outfile,rank);
4490   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4491   ierr = PetscFree(name);CHKERRQ(ierr);
4492   ierr = MatView(B,out);CHKERRQ(ierr);
4493   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4494   ierr = MatDestroy(&B);CHKERRQ(ierr);
4495   PetscFunctionReturn(0);
4496 }
4497 
4498 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4499 {
4500   PetscErrorCode      ierr;
4501   Mat_Merge_SeqsToMPI *merge;
4502   PetscContainer      container;
4503 
4504   PetscFunctionBegin;
4505   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4506   if (container) {
4507     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4508     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4509     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4510     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4511     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4512     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4513     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4514     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4515     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4516     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4517     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4518     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4519     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4520     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4521     ierr = PetscFree(merge);CHKERRQ(ierr);
4522     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4523   }
4524   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4525   PetscFunctionReturn(0);
4526 }
4527 
4528 #include <../src/mat/utils/freespace.h>
4529 #include <petscbt.h>
4530 
4531 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4532 {
4533   PetscErrorCode      ierr;
4534   MPI_Comm            comm;
4535   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4536   PetscMPIInt         size,rank,taga,*len_s;
4537   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4538   PetscInt            proc,m;
4539   PetscInt            **buf_ri,**buf_rj;
4540   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4541   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4542   MPI_Request         *s_waits,*r_waits;
4543   MPI_Status          *status;
4544   MatScalar           *aa=a->a;
4545   MatScalar           **abuf_r,*ba_i;
4546   Mat_Merge_SeqsToMPI *merge;
4547   PetscContainer      container;
4548 
4549   PetscFunctionBegin;
4550   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4551   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4552 
4553   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4554   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4555 
4556   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4557   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4558 
4559   bi     = merge->bi;
4560   bj     = merge->bj;
4561   buf_ri = merge->buf_ri;
4562   buf_rj = merge->buf_rj;
4563 
4564   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4565   owners = merge->rowmap->range;
4566   len_s  = merge->len_s;
4567 
4568   /* send and recv matrix values */
4569   /*-----------------------------*/
4570   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4571   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4572 
4573   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4574   for (proc=0,k=0; proc<size; proc++) {
4575     if (!len_s[proc]) continue;
4576     i    = owners[proc];
4577     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4578     k++;
4579   }
4580 
4581   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4582   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4583   ierr = PetscFree(status);CHKERRQ(ierr);
4584 
4585   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4586   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4587 
4588   /* insert mat values of mpimat */
4589   /*----------------------------*/
4590   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4591   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4592 
4593   for (k=0; k<merge->nrecv; k++) {
4594     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4595     nrows       = *(buf_ri_k[k]);
4596     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4597     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4598   }
4599 
4600   /* set values of ba */
4601   m = merge->rowmap->n;
4602   for (i=0; i<m; i++) {
4603     arow = owners[rank] + i;
4604     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4605     bnzi = bi[i+1] - bi[i];
4606     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4607 
4608     /* add local non-zero vals of this proc's seqmat into ba */
4609     anzi   = ai[arow+1] - ai[arow];
4610     aj     = a->j + ai[arow];
4611     aa     = a->a + ai[arow];
4612     nextaj = 0;
4613     for (j=0; nextaj<anzi; j++) {
4614       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4615         ba_i[j] += aa[nextaj++];
4616       }
4617     }
4618 
4619     /* add received vals into ba */
4620     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4621       /* i-th row */
4622       if (i == *nextrow[k]) {
4623         anzi   = *(nextai[k]+1) - *nextai[k];
4624         aj     = buf_rj[k] + *(nextai[k]);
4625         aa     = abuf_r[k] + *(nextai[k]);
4626         nextaj = 0;
4627         for (j=0; nextaj<anzi; j++) {
4628           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4629             ba_i[j] += aa[nextaj++];
4630           }
4631         }
4632         nextrow[k]++; nextai[k]++;
4633       }
4634     }
4635     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4636   }
4637   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4638   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4639 
4640   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4641   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4642   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4643   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4644   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4645   PetscFunctionReturn(0);
4646 }
4647 
4648 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4649 {
4650   PetscErrorCode      ierr;
4651   Mat                 B_mpi;
4652   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4653   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4654   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4655   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4656   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4657   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4658   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4659   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4660   MPI_Status          *status;
4661   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4662   PetscBT             lnkbt;
4663   Mat_Merge_SeqsToMPI *merge;
4664   PetscContainer      container;
4665 
4666   PetscFunctionBegin;
4667   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4668 
4669   /* make sure it is a PETSc comm */
4670   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4672   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4673 
4674   ierr = PetscNew(&merge);CHKERRQ(ierr);
4675   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4676 
4677   /* determine row ownership */
4678   /*---------------------------------------------------------*/
4679   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4680   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4681   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4682   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4683   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4684   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4685   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4686 
4687   m      = merge->rowmap->n;
4688   owners = merge->rowmap->range;
4689 
4690   /* determine the number of messages to send, their lengths */
4691   /*---------------------------------------------------------*/
4692   len_s = merge->len_s;
4693 
4694   len          = 0; /* length of buf_si[] */
4695   merge->nsend = 0;
4696   for (proc=0; proc<size; proc++) {
4697     len_si[proc] = 0;
4698     if (proc == rank) {
4699       len_s[proc] = 0;
4700     } else {
4701       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4702       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4703     }
4704     if (len_s[proc]) {
4705       merge->nsend++;
4706       nrows = 0;
4707       for (i=owners[proc]; i<owners[proc+1]; i++) {
4708         if (ai[i+1] > ai[i]) nrows++;
4709       }
4710       len_si[proc] = 2*(nrows+1);
4711       len         += len_si[proc];
4712     }
4713   }
4714 
4715   /* determine the number and length of messages to receive for ij-structure */
4716   /*-------------------------------------------------------------------------*/
4717   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4718   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4719 
4720   /* post the Irecv of j-structure */
4721   /*-------------------------------*/
4722   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4723   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4724 
4725   /* post the Isend of j-structure */
4726   /*--------------------------------*/
4727   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4728 
4729   for (proc=0, k=0; proc<size; proc++) {
4730     if (!len_s[proc]) continue;
4731     i    = owners[proc];
4732     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4733     k++;
4734   }
4735 
4736   /* receives and sends of j-structure are complete */
4737   /*------------------------------------------------*/
4738   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4739   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4740 
4741   /* send and recv i-structure */
4742   /*---------------------------*/
4743   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4744   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4745 
4746   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4747   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4748   for (proc=0,k=0; proc<size; proc++) {
4749     if (!len_s[proc]) continue;
4750     /* form outgoing message for i-structure:
4751          buf_si[0]:                 nrows to be sent
4752                [1:nrows]:           row index (global)
4753                [nrows+1:2*nrows+1]: i-structure index
4754     */
4755     /*-------------------------------------------*/
4756     nrows       = len_si[proc]/2 - 1;
4757     buf_si_i    = buf_si + nrows+1;
4758     buf_si[0]   = nrows;
4759     buf_si_i[0] = 0;
4760     nrows       = 0;
4761     for (i=owners[proc]; i<owners[proc+1]; i++) {
4762       anzi = ai[i+1] - ai[i];
4763       if (anzi) {
4764         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4765         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4766         nrows++;
4767       }
4768     }
4769     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4770     k++;
4771     buf_si += len_si[proc];
4772   }
4773 
4774   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4775   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4776 
4777   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4778   for (i=0; i<merge->nrecv; i++) {
4779     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4780   }
4781 
4782   ierr = PetscFree(len_si);CHKERRQ(ierr);
4783   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4784   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4785   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4786   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4787   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4788   ierr = PetscFree(status);CHKERRQ(ierr);
4789 
4790   /* compute a local seq matrix in each processor */
4791   /*----------------------------------------------*/
4792   /* allocate bi array and free space for accumulating nonzero column info */
4793   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4794   bi[0] = 0;
4795 
4796   /* create and initialize a linked list */
4797   nlnk = N+1;
4798   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4799 
4800   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4801   len  = ai[owners[rank+1]] - ai[owners[rank]];
4802   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4803 
4804   current_space = free_space;
4805 
4806   /* determine symbolic info for each local row */
4807   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4808 
4809   for (k=0; k<merge->nrecv; k++) {
4810     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4811     nrows       = *buf_ri_k[k];
4812     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4813     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4814   }
4815 
4816   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4817   len  = 0;
4818   for (i=0; i<m; i++) {
4819     bnzi = 0;
4820     /* add local non-zero cols of this proc's seqmat into lnk */
4821     arow  = owners[rank] + i;
4822     anzi  = ai[arow+1] - ai[arow];
4823     aj    = a->j + ai[arow];
4824     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4825     bnzi += nlnk;
4826     /* add received col data into lnk */
4827     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4828       if (i == *nextrow[k]) { /* i-th row */
4829         anzi  = *(nextai[k]+1) - *nextai[k];
4830         aj    = buf_rj[k] + *nextai[k];
4831         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4832         bnzi += nlnk;
4833         nextrow[k]++; nextai[k]++;
4834       }
4835     }
4836     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4837 
4838     /* if free space is not available, make more free space */
4839     if (current_space->local_remaining<bnzi) {
4840       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4841       nspacedouble++;
4842     }
4843     /* copy data into free space, then initialize lnk */
4844     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4845     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4846 
4847     current_space->array           += bnzi;
4848     current_space->local_used      += bnzi;
4849     current_space->local_remaining -= bnzi;
4850 
4851     bi[i+1] = bi[i] + bnzi;
4852   }
4853 
4854   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4855 
4856   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4857   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4858   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4859 
4860   /* create symbolic parallel matrix B_mpi */
4861   /*---------------------------------------*/
4862   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4863   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4864   if (n==PETSC_DECIDE) {
4865     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4866   } else {
4867     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4868   }
4869   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4870   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4871   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4872   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4873   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4874 
4875   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4876   B_mpi->assembled    = PETSC_FALSE;
4877   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4878   merge->bi           = bi;
4879   merge->bj           = bj;
4880   merge->buf_ri       = buf_ri;
4881   merge->buf_rj       = buf_rj;
4882   merge->coi          = NULL;
4883   merge->coj          = NULL;
4884   merge->owners_co    = NULL;
4885 
4886   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4887 
4888   /* attach the supporting struct to B_mpi for reuse */
4889   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4890   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4891   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4892   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4893   *mpimat = B_mpi;
4894 
4895   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4896   PetscFunctionReturn(0);
4897 }
4898 
4899 /*@C
4900       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4901                  matrices from each processor
4902 
4903     Collective
4904 
4905    Input Parameters:
4906 +    comm - the communicators the parallel matrix will live on
4907 .    seqmat - the input sequential matrices
4908 .    m - number of local rows (or PETSC_DECIDE)
4909 .    n - number of local columns (or PETSC_DECIDE)
4910 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4911 
4912    Output Parameter:
4913 .    mpimat - the parallel matrix generated
4914 
4915     Level: advanced
4916 
4917    Notes:
4918      The dimensions of the sequential matrix in each processor MUST be the same.
4919      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4920      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4921 @*/
4922 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4923 {
4924   PetscErrorCode ierr;
4925   PetscMPIInt    size;
4926 
4927   PetscFunctionBegin;
4928   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4929   if (size == 1) {
4930     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4931     if (scall == MAT_INITIAL_MATRIX) {
4932       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4933     } else {
4934       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4935     }
4936     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4937     PetscFunctionReturn(0);
4938   }
4939   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4940   if (scall == MAT_INITIAL_MATRIX) {
4941     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4942   }
4943   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4944   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4945   PetscFunctionReturn(0);
4946 }
4947 
4948 /*@
4949      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4950           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4951           with MatGetSize()
4952 
4953     Not Collective
4954 
4955    Input Parameters:
4956 +    A - the matrix
4957 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 
4959    Output Parameter:
4960 .    A_loc - the local sequential matrix generated
4961 
4962     Level: developer
4963 
4964    Notes:
4965      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4966      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4967      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4968      modify the values of the returned A_loc.
4969 
4970 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4971 
4972 @*/
4973 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4974 {
4975   PetscErrorCode ierr;
4976   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4977   Mat_SeqAIJ     *mat,*a,*b;
4978   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4979   MatScalar      *aa,*ba,*cam;
4980   PetscScalar    *ca;
4981   PetscMPIInt    size;
4982   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4983   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4984   PetscBool      match;
4985 
4986   PetscFunctionBegin;
4987   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4988   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4989   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4990   if (size == 1) {
4991     if (scall == MAT_INITIAL_MATRIX) {
4992       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4993       *A_loc = mpimat->A;
4994     } else if (scall == MAT_REUSE_MATRIX) {
4995       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4996     }
4997     PetscFunctionReturn(0);
4998   }
4999 
5000   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5001   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5002   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5003   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5004   aa = a->a; ba = b->a;
5005   if (scall == MAT_INITIAL_MATRIX) {
5006     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5007     ci[0] = 0;
5008     for (i=0; i<am; i++) {
5009       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5010     }
5011     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5012     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5013     k    = 0;
5014     for (i=0; i<am; i++) {
5015       ncols_o = bi[i+1] - bi[i];
5016       ncols_d = ai[i+1] - ai[i];
5017       /* off-diagonal portion of A */
5018       for (jo=0; jo<ncols_o; jo++) {
5019         col = cmap[*bj];
5020         if (col >= cstart) break;
5021         cj[k]   = col; bj++;
5022         ca[k++] = *ba++;
5023       }
5024       /* diagonal portion of A */
5025       for (j=0; j<ncols_d; j++) {
5026         cj[k]   = cstart + *aj++;
5027         ca[k++] = *aa++;
5028       }
5029       /* off-diagonal portion of A */
5030       for (j=jo; j<ncols_o; j++) {
5031         cj[k]   = cmap[*bj++];
5032         ca[k++] = *ba++;
5033       }
5034     }
5035     /* put together the new matrix */
5036     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5037     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5038     /* Since these are PETSc arrays, change flags to free them as necessary. */
5039     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5040     mat->free_a  = PETSC_TRUE;
5041     mat->free_ij = PETSC_TRUE;
5042     mat->nonew   = 0;
5043   } else if (scall == MAT_REUSE_MATRIX) {
5044     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5045     ci = mat->i; cj = mat->j; cam = mat->a;
5046     for (i=0; i<am; i++) {
5047       /* off-diagonal portion of A */
5048       ncols_o = bi[i+1] - bi[i];
5049       for (jo=0; jo<ncols_o; jo++) {
5050         col = cmap[*bj];
5051         if (col >= cstart) break;
5052         *cam++ = *ba++; bj++;
5053       }
5054       /* diagonal portion of A */
5055       ncols_d = ai[i+1] - ai[i];
5056       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5057       /* off-diagonal portion of A */
5058       for (j=jo; j<ncols_o; j++) {
5059         *cam++ = *ba++; bj++;
5060       }
5061     }
5062   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5063   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5064   PetscFunctionReturn(0);
5065 }
5066 
5067 /*@C
5068      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5069 
5070     Not Collective
5071 
5072    Input Parameters:
5073 +    A - the matrix
5074 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5075 -    row, col - index sets of rows and columns to extract (or NULL)
5076 
5077    Output Parameter:
5078 .    A_loc - the local sequential matrix generated
5079 
5080     Level: developer
5081 
5082 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5083 
5084 @*/
5085 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5086 {
5087   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5088   PetscErrorCode ierr;
5089   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5090   IS             isrowa,iscola;
5091   Mat            *aloc;
5092   PetscBool      match;
5093 
5094   PetscFunctionBegin;
5095   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5096   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5097   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5098   if (!row) {
5099     start = A->rmap->rstart; end = A->rmap->rend;
5100     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5101   } else {
5102     isrowa = *row;
5103   }
5104   if (!col) {
5105     start = A->cmap->rstart;
5106     cmap  = a->garray;
5107     nzA   = a->A->cmap->n;
5108     nzB   = a->B->cmap->n;
5109     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5110     ncols = 0;
5111     for (i=0; i<nzB; i++) {
5112       if (cmap[i] < start) idx[ncols++] = cmap[i];
5113       else break;
5114     }
5115     imark = i;
5116     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5117     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5118     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5119   } else {
5120     iscola = *col;
5121   }
5122   if (scall != MAT_INITIAL_MATRIX) {
5123     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5124     aloc[0] = *A_loc;
5125   }
5126   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5127   if (!col) { /* attach global id of condensed columns */
5128     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5129   }
5130   *A_loc = aloc[0];
5131   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5132   if (!row) {
5133     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5134   }
5135   if (!col) {
5136     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5137   }
5138   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5139   PetscFunctionReturn(0);
5140 }
5141 
5142 /*
5143  * Destroy a mat that may be composed with PetscSF communication objects.
5144  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5145  * */
5146 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5147 {
5148   PetscSF          sf,osf;
5149   IS               map;
5150   PetscErrorCode   ierr;
5151 
5152   PetscFunctionBegin;
5153   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5154   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5155   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5156   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5157   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5158   ierr = ISDestroy(&map);CHKERRQ(ierr);
5159   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5160   PetscFunctionReturn(0);
5161 }
5162 
5163 /*
5164  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5165  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5166  * on a global size.
5167  * */
5168 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5169 {
5170   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5171   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5172   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5173   PetscMPIInt              owner;
5174   PetscSFNode              *iremote,*oiremote;
5175   const PetscInt           *lrowindices;
5176   PetscErrorCode           ierr;
5177   PetscSF                  sf,osf;
5178   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5179   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5180   MPI_Comm                 comm;
5181   ISLocalToGlobalMapping   mapping;
5182 
5183   PetscFunctionBegin;
5184   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5185   /* plocalsize is the number of roots
5186    * nrows is the number of leaves
5187    * */
5188   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5189   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5190   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5191   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5192   for (i=0;i<nrows;i++) {
5193     /* Find a remote index and an owner for a row
5194      * The row could be local or remote
5195      * */
5196     owner = 0;
5197     lidx  = 0;
5198     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5199     iremote[i].index = lidx;
5200     iremote[i].rank  = owner;
5201   }
5202   /* Create SF to communicate how many nonzero columns for each row */
5203   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5204   /* SF will figure out the number of nonzero colunms for each row, and their
5205    * offsets
5206    * */
5207   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5208   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5209   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5210 
5211   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5212   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5213   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5214   roffsets[0] = 0;
5215   roffsets[1] = 0;
5216   for (i=0;i<plocalsize;i++) {
5217     /* diag */
5218     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5219     /* off diag */
5220     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5221     /* compute offsets so that we relative location for each row */
5222     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5223     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5224   }
5225   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5226   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5227   /* 'r' means root, and 'l' means leaf */
5228   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5229   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5230   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5231   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5232   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5233   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5234   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5235   dntotalcols = 0;
5236   ontotalcols = 0;
5237   ncol = 0;
5238   for (i=0;i<nrows;i++) {
5239     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5240     ncol = PetscMax(pnnz[i],ncol);
5241     /* diag */
5242     dntotalcols += nlcols[i*2+0];
5243     /* off diag */
5244     ontotalcols += nlcols[i*2+1];
5245   }
5246   /* We do not need to figure the right number of columns
5247    * since all the calculations will be done by going through the raw data
5248    * */
5249   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5250   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5251   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5252   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5253   /* diag */
5254   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5255   /* off diag */
5256   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5257   /* diag */
5258   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5259   /* off diag */
5260   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5261   dntotalcols = 0;
5262   ontotalcols = 0;
5263   ntotalcols  = 0;
5264   for (i=0;i<nrows;i++) {
5265     owner = 0;
5266     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5267     /* Set iremote for diag matrix */
5268     for (j=0;j<nlcols[i*2+0];j++) {
5269       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5270       iremote[dntotalcols].rank    = owner;
5271       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5272       ilocal[dntotalcols++]        = ntotalcols++;
5273     }
5274     /* off diag */
5275     for (j=0;j<nlcols[i*2+1];j++) {
5276       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5277       oiremote[ontotalcols].rank    = owner;
5278       oilocal[ontotalcols++]        = ntotalcols++;
5279     }
5280   }
5281   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5282   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5283   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5284   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5285   /* P serves as roots and P_oth is leaves
5286    * Diag matrix
5287    * */
5288   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5289   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5290   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5291 
5292   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5293   /* Off diag */
5294   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5295   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5296   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5297   /* We operate on the matrix internal data for saving memory */
5298   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5299   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5300   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5301   /* Convert to global indices for diag matrix */
5302   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5303   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5304   /* We want P_oth store global indices */
5305   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5306   /* Use memory scalable approach */
5307   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5308   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5309   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5310   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5311   /* Convert back to local indices */
5312   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5313   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5314   nout = 0;
5315   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5316   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5317   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5318   /* Exchange values */
5319   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5320   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5321   /* Stop PETSc from shrinking memory */
5322   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5323   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5324   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5325   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5326   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5327   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5328   /* ``New MatDestroy" takes care of PetscSF objects as well */
5329   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5330   PetscFunctionReturn(0);
5331 }
5332 
5333 /*
5334  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5335  * This supports MPIAIJ and MAIJ
5336  * */
5337 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5338 {
5339   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5340   Mat_SeqAIJ            *p_oth;
5341   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5342   IS                    rows,map;
5343   PetscHMapI            hamp;
5344   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5345   MPI_Comm              comm;
5346   PetscSF               sf,osf;
5347   PetscBool             has;
5348   PetscErrorCode        ierr;
5349 
5350   PetscFunctionBegin;
5351   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5352   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5353   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5354    *  and then create a submatrix (that often is an overlapping matrix)
5355    * */
5356   if (reuse==MAT_INITIAL_MATRIX) {
5357     /* Use a hash table to figure out unique keys */
5358     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5359     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5360     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5361     count = 0;
5362     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5363     for (i=0;i<a->B->cmap->n;i++) {
5364       key  = a->garray[i]/dof;
5365       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5366       if (!has) {
5367         mapping[i] = count;
5368         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5369       } else {
5370         /* Current 'i' has the same value the previous step */
5371         mapping[i] = count-1;
5372       }
5373     }
5374     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5375     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5376     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5377     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5378     off = 0;
5379     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5380     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5381     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5382     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5383     /* In case, the matrix was already created but users want to recreate the matrix */
5384     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5385     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5386     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5387     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5388   } else if (reuse==MAT_REUSE_MATRIX) {
5389     /* If matrix was already created, we simply update values using SF objects
5390      * that as attached to the matrix ealier.
5391      *  */
5392     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5393     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5394     if (!sf || !osf) {
5395       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5396     }
5397     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5398     /* Update values in place */
5399     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5400     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5401     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5402     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5403   } else {
5404     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5405   }
5406   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5407   PetscFunctionReturn(0);
5408 }
5409 
5410 /*@C
5411     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5412 
5413     Collective on Mat
5414 
5415    Input Parameters:
5416 +    A,B - the matrices in mpiaij format
5417 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5418 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5419 
5420    Output Parameter:
5421 +    rowb, colb - index sets of rows and columns of B to extract
5422 -    B_seq - the sequential matrix generated
5423 
5424     Level: developer
5425 
5426 @*/
5427 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5428 {
5429   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5430   PetscErrorCode ierr;
5431   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5432   IS             isrowb,iscolb;
5433   Mat            *bseq=NULL;
5434 
5435   PetscFunctionBegin;
5436   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5437     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5438   }
5439   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5440 
5441   if (scall == MAT_INITIAL_MATRIX) {
5442     start = A->cmap->rstart;
5443     cmap  = a->garray;
5444     nzA   = a->A->cmap->n;
5445     nzB   = a->B->cmap->n;
5446     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5447     ncols = 0;
5448     for (i=0; i<nzB; i++) {  /* row < local row index */
5449       if (cmap[i] < start) idx[ncols++] = cmap[i];
5450       else break;
5451     }
5452     imark = i;
5453     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5454     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5455     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5456     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5457   } else {
5458     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5459     isrowb  = *rowb; iscolb = *colb;
5460     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5461     bseq[0] = *B_seq;
5462   }
5463   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5464   *B_seq = bseq[0];
5465   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5466   if (!rowb) {
5467     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5468   } else {
5469     *rowb = isrowb;
5470   }
5471   if (!colb) {
5472     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5473   } else {
5474     *colb = iscolb;
5475   }
5476   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5477   PetscFunctionReturn(0);
5478 }
5479 
5480 /*
5481     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5482     of the OFF-DIAGONAL portion of local A
5483 
5484     Collective on Mat
5485 
5486    Input Parameters:
5487 +    A,B - the matrices in mpiaij format
5488 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5489 
5490    Output Parameter:
5491 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5492 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5493 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5494 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5495 
5496     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5497      for this matrix. This is not desirable..
5498 
5499     Level: developer
5500 
5501 */
5502 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5503 {
5504   PetscErrorCode         ierr;
5505   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5506   Mat_SeqAIJ             *b_oth;
5507   VecScatter             ctx;
5508   MPI_Comm               comm;
5509   const PetscMPIInt      *rprocs,*sprocs;
5510   const PetscInt         *srow,*rstarts,*sstarts;
5511   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5512   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5513   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5514   MPI_Request            *rwaits = NULL,*swaits = NULL;
5515   MPI_Status             rstatus;
5516   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5517 
5518   PetscFunctionBegin;
5519   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5521 
5522   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5523     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5524   }
5525   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5526   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5527 
5528   if (size == 1) {
5529     startsj_s = NULL;
5530     bufa_ptr  = NULL;
5531     *B_oth    = NULL;
5532     PetscFunctionReturn(0);
5533   }
5534 
5535   ctx = a->Mvctx;
5536   tag = ((PetscObject)ctx)->tag;
5537 
5538   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5539   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5540   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5541   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5542   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5543   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5544   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5545 
5546   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5547   if (scall == MAT_INITIAL_MATRIX) {
5548     /* i-array */
5549     /*---------*/
5550     /*  post receives */
5551     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5552     for (i=0; i<nrecvs; i++) {
5553       rowlen = rvalues + rstarts[i]*rbs;
5554       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5555       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5556     }
5557 
5558     /* pack the outgoing message */
5559     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5560 
5561     sstartsj[0] = 0;
5562     rstartsj[0] = 0;
5563     len         = 0; /* total length of j or a array to be sent */
5564     if (nsends) {
5565       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5566       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5567     }
5568     for (i=0; i<nsends; i++) {
5569       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5570       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5571       for (j=0; j<nrows; j++) {
5572         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5573         for (l=0; l<sbs; l++) {
5574           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5575 
5576           rowlen[j*sbs+l] = ncols;
5577 
5578           len += ncols;
5579           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5580         }
5581         k++;
5582       }
5583       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5584 
5585       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5586     }
5587     /* recvs and sends of i-array are completed */
5588     i = nrecvs;
5589     while (i--) {
5590       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5591     }
5592     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5593     ierr = PetscFree(svalues);CHKERRQ(ierr);
5594 
5595     /* allocate buffers for sending j and a arrays */
5596     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5597     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5598 
5599     /* create i-array of B_oth */
5600     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5601 
5602     b_othi[0] = 0;
5603     len       = 0; /* total length of j or a array to be received */
5604     k         = 0;
5605     for (i=0; i<nrecvs; i++) {
5606       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5607       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5608       for (j=0; j<nrows; j++) {
5609         b_othi[k+1] = b_othi[k] + rowlen[j];
5610         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5611         k++;
5612       }
5613       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5614     }
5615     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5616 
5617     /* allocate space for j and a arrrays of B_oth */
5618     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5619     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5620 
5621     /* j-array */
5622     /*---------*/
5623     /*  post receives of j-array */
5624     for (i=0; i<nrecvs; i++) {
5625       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5626       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5627     }
5628 
5629     /* pack the outgoing message j-array */
5630     if (nsends) k = sstarts[0];
5631     for (i=0; i<nsends; i++) {
5632       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5633       bufJ  = bufj+sstartsj[i];
5634       for (j=0; j<nrows; j++) {
5635         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5636         for (ll=0; ll<sbs; ll++) {
5637           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5638           for (l=0; l<ncols; l++) {
5639             *bufJ++ = cols[l];
5640           }
5641           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5642         }
5643       }
5644       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5645     }
5646 
5647     /* recvs and sends of j-array are completed */
5648     i = nrecvs;
5649     while (i--) {
5650       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5651     }
5652     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5653   } else if (scall == MAT_REUSE_MATRIX) {
5654     sstartsj = *startsj_s;
5655     rstartsj = *startsj_r;
5656     bufa     = *bufa_ptr;
5657     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5658     b_otha   = b_oth->a;
5659   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5660 
5661   /* a-array */
5662   /*---------*/
5663   /*  post receives of a-array */
5664   for (i=0; i<nrecvs; i++) {
5665     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5666     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5667   }
5668 
5669   /* pack the outgoing message a-array */
5670   if (nsends) k = sstarts[0];
5671   for (i=0; i<nsends; i++) {
5672     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5673     bufA  = bufa+sstartsj[i];
5674     for (j=0; j<nrows; j++) {
5675       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5676       for (ll=0; ll<sbs; ll++) {
5677         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5678         for (l=0; l<ncols; l++) {
5679           *bufA++ = vals[l];
5680         }
5681         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5682       }
5683     }
5684     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5685   }
5686   /* recvs and sends of a-array are completed */
5687   i = nrecvs;
5688   while (i--) {
5689     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5690   }
5691   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5692   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5693 
5694   if (scall == MAT_INITIAL_MATRIX) {
5695     /* put together the new matrix */
5696     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5697 
5698     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5699     /* Since these are PETSc arrays, change flags to free them as necessary. */
5700     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5701     b_oth->free_a  = PETSC_TRUE;
5702     b_oth->free_ij = PETSC_TRUE;
5703     b_oth->nonew   = 0;
5704 
5705     ierr = PetscFree(bufj);CHKERRQ(ierr);
5706     if (!startsj_s || !bufa_ptr) {
5707       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5708       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5709     } else {
5710       *startsj_s = sstartsj;
5711       *startsj_r = rstartsj;
5712       *bufa_ptr  = bufa;
5713     }
5714   }
5715 
5716   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5717   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5718   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5719   PetscFunctionReturn(0);
5720 }
5721 
5722 /*@C
5723   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5724 
5725   Not Collective
5726 
5727   Input Parameters:
5728 . A - The matrix in mpiaij format
5729 
5730   Output Parameter:
5731 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5732 . colmap - A map from global column index to local index into lvec
5733 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5734 
5735   Level: developer
5736 
5737 @*/
5738 #if defined(PETSC_USE_CTABLE)
5739 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5740 #else
5741 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5742 #endif
5743 {
5744   Mat_MPIAIJ *a;
5745 
5746   PetscFunctionBegin;
5747   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5748   PetscValidPointer(lvec, 2);
5749   PetscValidPointer(colmap, 3);
5750   PetscValidPointer(multScatter, 4);
5751   a = (Mat_MPIAIJ*) A->data;
5752   if (lvec) *lvec = a->lvec;
5753   if (colmap) *colmap = a->colmap;
5754   if (multScatter) *multScatter = a->Mvctx;
5755   PetscFunctionReturn(0);
5756 }
5757 
5758 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5759 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5760 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5761 #if defined(PETSC_HAVE_MKL_SPARSE)
5762 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5763 #endif
5764 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5765 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5766 #if defined(PETSC_HAVE_ELEMENTAL)
5767 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5768 #endif
5769 #if defined(PETSC_HAVE_HYPRE)
5770 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5771 #endif
5772 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5773 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5774 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5775 
5776 /*
5777     Computes (B'*A')' since computing B*A directly is untenable
5778 
5779                n                       p                          p
5780         (              )       (              )         (                  )
5781       m (      A       )  *  n (       B      )   =   m (         C        )
5782         (              )       (              )         (                  )
5783 
5784 */
5785 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5786 {
5787   PetscErrorCode ierr;
5788   Mat            At,Bt,Ct;
5789 
5790   PetscFunctionBegin;
5791   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5792   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5793   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5794   ierr = MatDestroy(&At);CHKERRQ(ierr);
5795   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5796   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5797   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5798   PetscFunctionReturn(0);
5799 }
5800 
5801 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5802 {
5803   PetscErrorCode ierr;
5804   PetscInt       m=A->rmap->n,n=B->cmap->n;
5805 
5806   PetscFunctionBegin;
5807   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5808   ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5809   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5810   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
5811   ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr);
5812   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5813   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5814 
5815   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5816   PetscFunctionReturn(0);
5817 }
5818 
5819 /* ----------------------------------------------------------------*/
5820 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5821 {
5822   Mat_Product *product = C->product;
5823   Mat         A = product->A,B=product->B;
5824 
5825   PetscFunctionBegin;
5826   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5827     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5828 
5829   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5830   C->ops->productsymbolic = MatProductSymbolic_AB;
5831   PetscFunctionReturn(0);
5832 }
5833 
5834 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5835 {
5836   PetscErrorCode ierr;
5837   Mat_Product    *product = C->product;
5838 
5839   PetscFunctionBegin;
5840   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
5841   if (product->type == MATPRODUCT_AB) {
5842     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
5843   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]);
5844   PetscFunctionReturn(0);
5845 }
5846 /* ----------------------------------------------------------------*/
5847 
5848 /*MC
5849    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5850 
5851    Options Database Keys:
5852 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5853 
5854    Level: beginner
5855 
5856    Notes:
5857     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5858     in this case the values associated with the rows and columns one passes in are set to zero
5859     in the matrix
5860 
5861     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5862     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5863 
5864 .seealso: MatCreateAIJ()
5865 M*/
5866 
5867 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5868 {
5869   Mat_MPIAIJ     *b;
5870   PetscErrorCode ierr;
5871   PetscMPIInt    size;
5872 
5873   PetscFunctionBegin;
5874   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5875 
5876   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5877   B->data       = (void*)b;
5878   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5879   B->assembled  = PETSC_FALSE;
5880   B->insertmode = NOT_SET_VALUES;
5881   b->size       = size;
5882 
5883   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5884 
5885   /* build cache for off array entries formed */
5886   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5887 
5888   b->donotstash  = PETSC_FALSE;
5889   b->colmap      = 0;
5890   b->garray      = 0;
5891   b->roworiented = PETSC_TRUE;
5892 
5893   /* stuff used for matrix vector multiply */
5894   b->lvec  = NULL;
5895   b->Mvctx = NULL;
5896 
5897   /* stuff for MatGetRow() */
5898   b->rowindices   = 0;
5899   b->rowvalues    = 0;
5900   b->getrowactive = PETSC_FALSE;
5901 
5902   /* flexible pointer used in CUSP/CUSPARSE classes */
5903   b->spptr = NULL;
5904 
5905   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5906   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5907   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5908   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5909   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5910   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5911   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5912   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5913   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5914   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5915 #if defined(PETSC_HAVE_MKL_SPARSE)
5916   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5917 #endif
5918   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5919   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5920   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5921 #if defined(PETSC_HAVE_ELEMENTAL)
5922   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5923 #endif
5924   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5925   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5926   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5927   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5928 #if defined(PETSC_HAVE_HYPRE)
5929   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5930   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5931 #endif
5932   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
5933   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
5934   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5935   PetscFunctionReturn(0);
5936 }
5937 
5938 /*@C
5939      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5940          and "off-diagonal" part of the matrix in CSR format.
5941 
5942    Collective
5943 
5944    Input Parameters:
5945 +  comm - MPI communicator
5946 .  m - number of local rows (Cannot be PETSC_DECIDE)
5947 .  n - This value should be the same as the local size used in creating the
5948        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5949        calculated if N is given) For square matrices n is almost always m.
5950 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5951 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5952 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5953 .   j - column indices
5954 .   a - matrix values
5955 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5956 .   oj - column indices
5957 -   oa - matrix values
5958 
5959    Output Parameter:
5960 .   mat - the matrix
5961 
5962    Level: advanced
5963 
5964    Notes:
5965        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5966        must free the arrays once the matrix has been destroyed and not before.
5967 
5968        The i and j indices are 0 based
5969 
5970        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5971 
5972        This sets local rows and cannot be used to set off-processor values.
5973 
5974        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5975        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5976        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5977        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5978        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5979        communication if it is known that only local entries will be set.
5980 
5981 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5982           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5983 @*/
5984 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5985 {
5986   PetscErrorCode ierr;
5987   Mat_MPIAIJ     *maij;
5988 
5989   PetscFunctionBegin;
5990   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5991   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5992   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5993   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5994   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5995   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5996   maij = (Mat_MPIAIJ*) (*mat)->data;
5997 
5998   (*mat)->preallocated = PETSC_TRUE;
5999 
6000   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6001   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6002 
6003   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6004   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6005 
6006   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6007   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6008   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6009   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6010 
6011   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6012   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6013   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6014   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6015   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6016   PetscFunctionReturn(0);
6017 }
6018 
6019 /*
6020     Special version for direct calls from Fortran
6021 */
6022 #include <petsc/private/fortranimpl.h>
6023 
6024 /* Change these macros so can be used in void function */
6025 #undef CHKERRQ
6026 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6027 #undef SETERRQ2
6028 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6029 #undef SETERRQ3
6030 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6031 #undef SETERRQ
6032 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6033 
6034 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6035 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6036 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6037 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6038 #else
6039 #endif
6040 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6041 {
6042   Mat            mat  = *mmat;
6043   PetscInt       m    = *mm, n = *mn;
6044   InsertMode     addv = *maddv;
6045   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6046   PetscScalar    value;
6047   PetscErrorCode ierr;
6048 
6049   MatCheckPreallocated(mat,1);
6050   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6051   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6052   {
6053     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6054     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6055     PetscBool roworiented = aij->roworiented;
6056 
6057     /* Some Variables required in the macro */
6058     Mat        A                    = aij->A;
6059     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6060     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6061     MatScalar  *aa                  = a->a;
6062     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6063     Mat        B                    = aij->B;
6064     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6065     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6066     MatScalar  *ba                  = b->a;
6067     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6068      * cannot use "#if defined" inside a macro. */
6069     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6070 
6071     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6072     PetscInt  nonew = a->nonew;
6073     MatScalar *ap1,*ap2;
6074 
6075     PetscFunctionBegin;
6076     for (i=0; i<m; i++) {
6077       if (im[i] < 0) continue;
6078       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6079       if (im[i] >= rstart && im[i] < rend) {
6080         row      = im[i] - rstart;
6081         lastcol1 = -1;
6082         rp1      = aj + ai[row];
6083         ap1      = aa + ai[row];
6084         rmax1    = aimax[row];
6085         nrow1    = ailen[row];
6086         low1     = 0;
6087         high1    = nrow1;
6088         lastcol2 = -1;
6089         rp2      = bj + bi[row];
6090         ap2      = ba + bi[row];
6091         rmax2    = bimax[row];
6092         nrow2    = bilen[row];
6093         low2     = 0;
6094         high2    = nrow2;
6095 
6096         for (j=0; j<n; j++) {
6097           if (roworiented) value = v[i*n+j];
6098           else value = v[i+j*m];
6099           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6100           if (in[j] >= cstart && in[j] < cend) {
6101             col = in[j] - cstart;
6102             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6103 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6104             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6105 #endif
6106           } else if (in[j] < 0) continue;
6107           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6108             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6109             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6110           } else {
6111             if (mat->was_assembled) {
6112               if (!aij->colmap) {
6113                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6114               }
6115 #if defined(PETSC_USE_CTABLE)
6116               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6117               col--;
6118 #else
6119               col = aij->colmap[in[j]] - 1;
6120 #endif
6121               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6122                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6123                 col  =  in[j];
6124                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6125                 B        = aij->B;
6126                 b        = (Mat_SeqAIJ*)B->data;
6127                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6128                 rp2      = bj + bi[row];
6129                 ap2      = ba + bi[row];
6130                 rmax2    = bimax[row];
6131                 nrow2    = bilen[row];
6132                 low2     = 0;
6133                 high2    = nrow2;
6134                 bm       = aij->B->rmap->n;
6135                 ba       = b->a;
6136                 inserted = PETSC_FALSE;
6137               }
6138             } else col = in[j];
6139             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6140 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6141             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6142 #endif
6143           }
6144         }
6145       } else if (!aij->donotstash) {
6146         if (roworiented) {
6147           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6148         } else {
6149           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6150         }
6151       }
6152     }
6153   }
6154   PetscFunctionReturnVoid();
6155 }
6156