xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d545763cb672d537e48081282d5f2c0ca0cd428b)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
584 #endif
585     if (im[i] >= rstart && im[i] < rend) {
586       row      = im[i] - rstart;
587       lastcol1 = -1;
588       rp1      = aj + ai[row];
589       ap1      = aa + ai[row];
590       rmax1    = aimax[row];
591       nrow1    = ailen[row];
592       low1     = 0;
593       high1    = nrow1;
594       lastcol2 = -1;
595       rp2      = bj + bi[row];
596       ap2      = ba + bi[row];
597       rmax2    = bimax[row];
598       nrow2    = bilen[row];
599       low2     = 0;
600       high2    = nrow2;
601 
602       for (j=0; j<n; j++) {
603         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
604         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
605         if (in[j] >= cstart && in[j] < cend) {
606           col   = in[j] - cstart;
607           nonew = a->nonew;
608           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
610           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
611 #endif
612         } else if (in[j] < 0) continue;
613 #if defined(PETSC_USE_DEBUG)
614         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
615 #endif
616         else {
617           if (mat->was_assembled) {
618             if (!aij->colmap) {
619               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
620             }
621 #if defined(PETSC_USE_CTABLE)
622             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
623             col--;
624 #else
625             col = aij->colmap[in[j]] - 1;
626 #endif
627             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
628               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
629               col  =  in[j];
630               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
631               B        = aij->B;
632               b        = (Mat_SeqAIJ*)B->data;
633               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
634               rp2      = bj + bi[row];
635               ap2      = ba + bi[row];
636               rmax2    = bimax[row];
637               nrow2    = bilen[row];
638               low2     = 0;
639               high2    = nrow2;
640               bm       = aij->B->rmap->n;
641               ba       = b->a;
642               inserted = PETSC_FALSE;
643             } else if (col < 0) {
644               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
645                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
646               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
647             }
648           } else col = in[j];
649           nonew = b->nonew;
650           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
652           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
653 #endif
654         }
655       }
656     } else {
657       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
658       if (!aij->donotstash) {
659         mat->assembled = PETSC_FALSE;
660         if (roworiented) {
661           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
662         } else {
663           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
664         }
665       }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
673     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
674     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
675 */
676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
677 {
678   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
679   Mat            A           = aij->A; /* diagonal part of the matrix */
680   Mat            B           = aij->B; /* offdiagonal part of the matrix */
681   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
682   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
683   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
684   PetscInt       *ailen      = a->ilen,*aj = a->j;
685   PetscInt       *bilen      = b->ilen,*bj = b->j;
686   PetscInt       am          = aij->A->rmap->n,j;
687   PetscInt       diag_so_far = 0,dnz;
688   PetscInt       offd_so_far = 0,onz;
689 
690   PetscFunctionBegin;
691   /* Iterate over all rows of the matrix */
692   for (j=0; j<am; j++) {
693     dnz = onz = 0;
694     /*  Iterate over all non-zero columns of the current row */
695     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
696       /* If column is in the diagonal */
697       if (mat_j[col] >= cstart && mat_j[col] < cend) {
698         aj[diag_so_far++] = mat_j[col] - cstart;
699         dnz++;
700       } else { /* off-diagonal entries */
701         bj[offd_so_far++] = mat_j[col];
702         onz++;
703       }
704     }
705     ailen[j] = dnz;
706     bilen[j] = onz;
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 /*
712     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
713     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
714     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
715     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
716     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
717 */
718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
719 {
720   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
721   Mat            A      = aij->A; /* diagonal part of the matrix */
722   Mat            B      = aij->B; /* offdiagonal part of the matrix */
723   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
724   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
725   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
726   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
727   PetscInt       *ailen = a->ilen,*aj = a->j;
728   PetscInt       *bilen = b->ilen,*bj = b->j;
729   PetscInt       am     = aij->A->rmap->n,j;
730   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
731   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
732   PetscScalar    *aa = a->a,*ba = b->a;
733 
734   PetscFunctionBegin;
735   /* Iterate over all rows of the matrix */
736   for (j=0; j<am; j++) {
737     dnz_row = onz_row = 0;
738     rowstart_offd = full_offd_i[j];
739     rowstart_diag = full_diag_i[j];
740     /*  Iterate over all non-zero columns of the current row */
741     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
742       /* If column is in the diagonal */
743       if (mat_j[col] >= cstart && mat_j[col] < cend) {
744         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
745         aa[rowstart_diag+dnz_row] = mat_a[col];
746         dnz_row++;
747       } else { /* off-diagonal entries */
748         bj[rowstart_offd+onz_row] = mat_j[col];
749         ba[rowstart_offd+onz_row] = mat_a[col];
750         onz_row++;
751       }
752     }
753     ailen[j] = dnz_row;
754     bilen[j] = onz_row;
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
764   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
765 
766   PetscFunctionBegin;
767   for (i=0; i<m; i++) {
768     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
769     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
770     if (idxm[i] >= rstart && idxm[i] < rend) {
771       row = idxm[i] - rstart;
772       for (j=0; j<n; j++) {
773         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
774         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
775         if (idxn[j] >= cstart && idxn[j] < cend) {
776           col  = idxn[j] - cstart;
777           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
778         } else {
779           if (!aij->colmap) {
780             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
781           }
782 #if defined(PETSC_USE_CTABLE)
783           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
784           col--;
785 #else
786           col = aij->colmap[idxn[j]] - 1;
787 #endif
788           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
789           else {
790             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
800 
801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   PetscErrorCode ierr;
805   PetscInt       nstash,reallocs;
806 
807   PetscFunctionBegin;
808   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
812   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
819   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
820   PetscErrorCode ierr;
821   PetscMPIInt    n;
822   PetscInt       i,j,rstart,ncols,flg;
823   PetscInt       *row,*col;
824   PetscBool      other_disassembled;
825   PetscScalar    *val;
826 
827   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
828 
829   PetscFunctionBegin;
830   if (!aij->donotstash && !mat->nooffprocentries) {
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for (i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for (j=i,rstart=row[j]; j<n; j++) {
838           if (row[j] != rstart) break;
839         }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         /* Now assemble all these values with a single function call */
843         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
844 
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849   }
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = 0;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113 
1114   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135   VecScatter     Mvctx = a->Mvctx;
1136 
1137   PetscFunctionBegin;
1138   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1139   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   /* do nondiagonal part */
1153   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1154   /* do local part */
1155   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1156   /* add partial results together */
1157   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1163 {
1164   MPI_Comm       comm;
1165   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1166   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1167   IS             Me,Notme;
1168   PetscErrorCode ierr;
1169   PetscInt       M,N,first,last,*notme,i;
1170   PetscBool      lf;
1171   PetscMPIInt    size;
1172 
1173   PetscFunctionBegin;
1174   /* Easy test: symmetric diagonal block */
1175   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1176   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1177   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1178   if (!*f) PetscFunctionReturn(0);
1179   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1181   if (size == 1) PetscFunctionReturn(0);
1182 
1183   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1184   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1185   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1186   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1187   for (i=0; i<first; i++) notme[i] = i;
1188   for (i=last; i<M; i++) notme[i-last+first] = i;
1189   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1190   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1191   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1192   Aoff = Aoffs[0];
1193   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1194   Boff = Boffs[0];
1195   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1197   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1199   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1200   ierr = PetscFree(notme);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1205 {
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214 {
1215   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   /* do nondiagonal part */
1220   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1221   /* do local part */
1222   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1223   /* add partial results together */
1224   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*
1230   This only works correctly for square matrices where the subblock A->A is the
1231    diagonal block
1232 */
1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1234 {
1235   PetscErrorCode ierr;
1236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1246 {
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1252   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1257 {
1258   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262 #if defined(PETSC_USE_LOG)
1263   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1264 #endif
1265   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1266   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1268   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1269 #if defined(PETSC_USE_CTABLE)
1270   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1271 #else
1272   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1273 #endif
1274   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1275   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1276   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1277   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1278   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1279   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1280   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1281 
1282   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1292 #if defined(PETSC_HAVE_ELEMENTAL)
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1294 #endif
1295 #if defined(PETSC_HAVE_HYPRE)
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1305 {
1306   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1307   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1308   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1309   const PetscInt    *garray = aij->garray;
1310   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1311   PetscInt          *rowlens;
1312   PetscInt          *colidxs;
1313   PetscScalar       *matvals;
1314   PetscErrorCode    ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1318 
1319   M  = mat->rmap->N;
1320   N  = mat->cmap->N;
1321   m  = mat->rmap->n;
1322   rs = mat->rmap->rstart;
1323   cs = mat->cmap->rstart;
1324   nz = A->nz + B->nz;
1325 
1326   /* write matrix header */
1327   header[0] = MAT_FILE_CLASSID;
1328   header[1] = M; header[2] = N; header[3] = nz;
1329   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1330   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1331 
1332   /* fill in and store row lengths  */
1333   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1334   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1335   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1336   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1337 
1338   /* fill in and store column indices */
1339   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1340   for (cnt=0, i=0; i<m; i++) {
1341     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1342       if (garray[B->j[jb]] > cs) break;
1343       colidxs[cnt++] = garray[B->j[jb]];
1344     }
1345     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1346       colidxs[cnt++] = A->j[ja] + cs;
1347     for (; jb<B->i[i+1]; jb++)
1348       colidxs[cnt++] = garray[B->j[jb]];
1349   }
1350   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1351   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1352   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1353 
1354   /* fill in and store nonzero values */
1355   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1356   for (cnt=0, i=0; i<m; i++) {
1357     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1358       if (garray[B->j[jb]] > cs) break;
1359       matvals[cnt++] = B->a[jb];
1360     }
1361     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1362       matvals[cnt++] = A->a[ja];
1363     for (; jb<B->i[i+1]; jb++)
1364       matvals[cnt++] = B->a[jb];
1365   }
1366   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1367   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1368   ierr = PetscFree(matvals);CHKERRQ(ierr);
1369 
1370   /* write block size option to the viewer's .info file */
1371   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #include <petscdraw.h>
1376 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1377 {
1378   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1379   PetscErrorCode    ierr;
1380   PetscMPIInt       rank = aij->rank,size = aij->size;
1381   PetscBool         isdraw,iascii,isbinary;
1382   PetscViewer       sviewer;
1383   PetscViewerFormat format;
1384 
1385   PetscFunctionBegin;
1386   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1387   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1389   if (iascii) {
1390     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1391     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1392       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1393       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1394       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1395       for (i=0; i<(PetscInt)size; i++) {
1396         nmax = PetscMax(nmax,nz[i]);
1397         nmin = PetscMin(nmin,nz[i]);
1398         navg += nz[i];
1399       }
1400       ierr = PetscFree(nz);CHKERRQ(ierr);
1401       navg = navg/size;
1402       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1403       PetscFunctionReturn(0);
1404     }
1405     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1406     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1407       MatInfo   info;
1408       PetscBool inodes;
1409 
1410       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1411       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1412       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1413       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1414       if (!inodes) {
1415         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1416                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1417       } else {
1418         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1419                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1420       }
1421       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1422       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1423       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1424       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1425       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1426       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1427       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1428       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1429       PetscFunctionReturn(0);
1430     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1431       PetscInt inodecount,inodelimit,*inodes;
1432       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1433       if (inodes) {
1434         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1435       } else {
1436         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1437       }
1438       PetscFunctionReturn(0);
1439     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1440       PetscFunctionReturn(0);
1441     }
1442   } else if (isbinary) {
1443     if (size == 1) {
1444       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1445       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1446     } else {
1447       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1448     }
1449     PetscFunctionReturn(0);
1450   } else if (iascii && size == 1) {
1451     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1452     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1453     PetscFunctionReturn(0);
1454   } else if (isdraw) {
1455     PetscDraw draw;
1456     PetscBool isnull;
1457     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1458     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1459     if (isnull) PetscFunctionReturn(0);
1460   }
1461 
1462   { /* assemble the entire matrix onto first processor */
1463     Mat A = NULL, Av;
1464     IS  isrow,iscol;
1465 
1466     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1467     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1468     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1469     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1470 /*  The commented code uses MatCreateSubMatrices instead */
1471 /*
1472     Mat *AA, A = NULL, Av;
1473     IS  isrow,iscol;
1474 
1475     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1476     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1477     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1478     if (!rank) {
1479        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1480        A    = AA[0];
1481        Av   = AA[0];
1482     }
1483     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1484 */
1485     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1486     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1487     /*
1488        Everyone has to call to draw the matrix since the graphics waits are
1489        synchronized across all processors that share the PetscDraw object
1490     */
1491     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1492     if (!rank) {
1493       if (((PetscObject)mat)->name) {
1494         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1495       }
1496       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1497     }
1498     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1499     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1500     ierr = MatDestroy(&A);CHKERRQ(ierr);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1506 {
1507   PetscErrorCode ierr;
1508   PetscBool      iascii,isdraw,issocket,isbinary;
1509 
1510   PetscFunctionBegin;
1511   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1512   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1513   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1514   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1515   if (iascii || isdraw || isbinary || issocket) {
1516     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1522 {
1523   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1524   PetscErrorCode ierr;
1525   Vec            bb1 = 0;
1526   PetscBool      hasop;
1527 
1528   PetscFunctionBegin;
1529   if (flag == SOR_APPLY_UPPER) {
1530     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531     PetscFunctionReturn(0);
1532   }
1533 
1534   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1535     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1536   }
1537 
1538   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1539     if (flag & SOR_ZERO_INITIAL_GUESS) {
1540       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1541       its--;
1542     }
1543 
1544     while (its--) {
1545       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547 
1548       /* update rhs: bb1 = bb - B*x */
1549       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1550       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1551 
1552       /* local sweep */
1553       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1554     }
1555   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1556     if (flag & SOR_ZERO_INITIAL_GUESS) {
1557       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1558       its--;
1559     }
1560     while (its--) {
1561       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1562       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563 
1564       /* update rhs: bb1 = bb - B*x */
1565       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1566       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1567 
1568       /* local sweep */
1569       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1570     }
1571   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1572     if (flag & SOR_ZERO_INITIAL_GUESS) {
1573       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1574       its--;
1575     }
1576     while (its--) {
1577       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579 
1580       /* update rhs: bb1 = bb - B*x */
1581       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1582       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1583 
1584       /* local sweep */
1585       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1586     }
1587   } else if (flag & SOR_EISENSTAT) {
1588     Vec xx1;
1589 
1590     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1591     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1592 
1593     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1594     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1595     if (!mat->diag) {
1596       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1597       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1598     }
1599     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1600     if (hasop) {
1601       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1602     } else {
1603       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1604     }
1605     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1606 
1607     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1608 
1609     /* local sweep */
1610     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1611     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1612     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1613   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1614 
1615   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1616 
1617   matin->factorerrortype = mat->A->factorerrortype;
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1622 {
1623   Mat            aA,aB,Aperm;
1624   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1625   PetscScalar    *aa,*ba;
1626   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1627   PetscSF        rowsf,sf;
1628   IS             parcolp = NULL;
1629   PetscBool      done;
1630   PetscErrorCode ierr;
1631 
1632   PetscFunctionBegin;
1633   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1634   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1635   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1636   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1637 
1638   /* Invert row permutation to find out where my rows should go */
1639   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1640   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1641   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1642   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1643   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1644   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1645 
1646   /* Invert column permutation to find out where my columns should go */
1647   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1648   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1649   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1650   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1651   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1652   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1653   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1654 
1655   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1656   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1657   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1658 
1659   /* Find out where my gcols should go */
1660   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1661   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1662   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1663   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1664   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1665   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1666   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1667   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1668 
1669   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1670   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1671   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1672   for (i=0; i<m; i++) {
1673     PetscInt    row = rdest[i];
1674     PetscMPIInt rowner;
1675     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1676     for (j=ai[i]; j<ai[i+1]; j++) {
1677       PetscInt    col = cdest[aj[j]];
1678       PetscMPIInt cowner;
1679       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1680       if (rowner == cowner) dnnz[i]++;
1681       else onnz[i]++;
1682     }
1683     for (j=bi[i]; j<bi[i+1]; j++) {
1684       PetscInt    col = gcdest[bj[j]];
1685       PetscMPIInt cowner;
1686       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1687       if (rowner == cowner) dnnz[i]++;
1688       else onnz[i]++;
1689     }
1690   }
1691   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1692   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1693   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1694   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1695   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1696 
1697   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1698   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1699   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1700   for (i=0; i<m; i++) {
1701     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1702     PetscInt j0,rowlen;
1703     rowlen = ai[i+1] - ai[i];
1704     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1705       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1706       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1707     }
1708     rowlen = bi[i+1] - bi[i];
1709     for (j0=j=0; j<rowlen; j0=j) {
1710       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1711       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1712     }
1713   }
1714   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1715   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1716   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1717   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1718   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1719   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1720   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1721   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1722   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1723   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1724   *B = Aperm;
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1729 {
1730   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1735   if (ghosts) *ghosts = aij->garray;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1740 {
1741   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1742   Mat            A    = mat->A,B = mat->B;
1743   PetscErrorCode ierr;
1744   PetscLogDouble isend[5],irecv[5];
1745 
1746   PetscFunctionBegin;
1747   info->block_size = 1.0;
1748   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1749 
1750   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1751   isend[3] = info->memory;  isend[4] = info->mallocs;
1752 
1753   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1754 
1755   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1756   isend[3] += info->memory;  isend[4] += info->mallocs;
1757   if (flag == MAT_LOCAL) {
1758     info->nz_used      = isend[0];
1759     info->nz_allocated = isend[1];
1760     info->nz_unneeded  = isend[2];
1761     info->memory       = isend[3];
1762     info->mallocs      = isend[4];
1763   } else if (flag == MAT_GLOBAL_MAX) {
1764     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1765 
1766     info->nz_used      = irecv[0];
1767     info->nz_allocated = irecv[1];
1768     info->nz_unneeded  = irecv[2];
1769     info->memory       = irecv[3];
1770     info->mallocs      = irecv[4];
1771   } else if (flag == MAT_GLOBAL_SUM) {
1772     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1773 
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   }
1780   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1781   info->fill_ratio_needed = 0;
1782   info->factor_mallocs    = 0;
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1787 {
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   switch (op) {
1793   case MAT_NEW_NONZERO_LOCATIONS:
1794   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1795   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1796   case MAT_KEEP_NONZERO_PATTERN:
1797   case MAT_NEW_NONZERO_LOCATION_ERR:
1798   case MAT_USE_INODES:
1799   case MAT_IGNORE_ZERO_ENTRIES:
1800     MatCheckPreallocated(A,1);
1801     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1802     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1803     break;
1804   case MAT_ROW_ORIENTED:
1805     MatCheckPreallocated(A,1);
1806     a->roworiented = flg;
1807 
1808     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1809     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1810     break;
1811   case MAT_NEW_DIAGONALS:
1812   case MAT_SORTED_FULL:
1813     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1814     break;
1815   case MAT_IGNORE_OFF_PROC_ENTRIES:
1816     a->donotstash = flg;
1817     break;
1818   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1819   case MAT_SPD:
1820   case MAT_SYMMETRIC:
1821   case MAT_STRUCTURALLY_SYMMETRIC:
1822   case MAT_HERMITIAN:
1823   case MAT_SYMMETRY_ETERNAL:
1824     break;
1825   case MAT_SUBMAT_SINGLEIS:
1826     A->submat_singleis = flg;
1827     break;
1828   case MAT_STRUCTURE_ONLY:
1829     /* The option is handled directly by MatSetOption() */
1830     break;
1831   default:
1832     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1838 {
1839   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1840   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1841   PetscErrorCode ierr;
1842   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1843   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1844   PetscInt       *cmap,*idx_p;
1845 
1846   PetscFunctionBegin;
1847   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1848   mat->getrowactive = PETSC_TRUE;
1849 
1850   if (!mat->rowvalues && (idx || v)) {
1851     /*
1852         allocate enough space to hold information from the longest row.
1853     */
1854     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1855     PetscInt   max = 1,tmp;
1856     for (i=0; i<matin->rmap->n; i++) {
1857       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1858       if (max < tmp) max = tmp;
1859     }
1860     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1861   }
1862 
1863   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1864   lrow = row - rstart;
1865 
1866   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1867   if (!v)   {pvA = 0; pvB = 0;}
1868   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1869   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1870   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1871   nztot = nzA + nzB;
1872 
1873   cmap = mat->garray;
1874   if (v  || idx) {
1875     if (nztot) {
1876       /* Sort by increasing column numbers, assuming A and B already sorted */
1877       PetscInt imark = -1;
1878       if (v) {
1879         *v = v_p = mat->rowvalues;
1880         for (i=0; i<nzB; i++) {
1881           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1882           else break;
1883         }
1884         imark = i;
1885         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1886         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1887       }
1888       if (idx) {
1889         *idx = idx_p = mat->rowindices;
1890         if (imark > -1) {
1891           for (i=0; i<imark; i++) {
1892             idx_p[i] = cmap[cworkB[i]];
1893           }
1894         } else {
1895           for (i=0; i<nzB; i++) {
1896             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1897             else break;
1898           }
1899           imark = i;
1900         }
1901         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1902         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1903       }
1904     } else {
1905       if (idx) *idx = 0;
1906       if (v)   *v   = 0;
1907     }
1908   }
1909   *nz  = nztot;
1910   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1911   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1916 {
1917   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1918 
1919   PetscFunctionBegin;
1920   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1921   aij->getrowactive = PETSC_FALSE;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1926 {
1927   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1928   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1929   PetscErrorCode ierr;
1930   PetscInt       i,j,cstart = mat->cmap->rstart;
1931   PetscReal      sum = 0.0;
1932   MatScalar      *v;
1933 
1934   PetscFunctionBegin;
1935   if (aij->size == 1) {
1936     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1937   } else {
1938     if (type == NORM_FROBENIUS) {
1939       v = amat->a;
1940       for (i=0; i<amat->nz; i++) {
1941         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1942       }
1943       v = bmat->a;
1944       for (i=0; i<bmat->nz; i++) {
1945         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1946       }
1947       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1948       *norm = PetscSqrtReal(*norm);
1949       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1950     } else if (type == NORM_1) { /* max column norm */
1951       PetscReal *tmp,*tmp2;
1952       PetscInt  *jj,*garray = aij->garray;
1953       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1954       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1955       *norm = 0.0;
1956       v     = amat->a; jj = amat->j;
1957       for (j=0; j<amat->nz; j++) {
1958         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1959       }
1960       v = bmat->a; jj = bmat->j;
1961       for (j=0; j<bmat->nz; j++) {
1962         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1963       }
1964       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1965       for (j=0; j<mat->cmap->N; j++) {
1966         if (tmp2[j] > *norm) *norm = tmp2[j];
1967       }
1968       ierr = PetscFree(tmp);CHKERRQ(ierr);
1969       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1970       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1971     } else if (type == NORM_INFINITY) { /* max row norm */
1972       PetscReal ntemp = 0.0;
1973       for (j=0; j<aij->A->rmap->n; j++) {
1974         v   = amat->a + amat->i[j];
1975         sum = 0.0;
1976         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1977           sum += PetscAbsScalar(*v); v++;
1978         }
1979         v = bmat->a + bmat->i[j];
1980         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1981           sum += PetscAbsScalar(*v); v++;
1982         }
1983         if (sum > ntemp) ntemp = sum;
1984       }
1985       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1986       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1987     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1993 {
1994   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1995   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1996   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1997   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1998   PetscErrorCode  ierr;
1999   Mat             B,A_diag,*B_diag;
2000   const MatScalar *array;
2001 
2002   PetscFunctionBegin;
2003   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2004   ai = Aloc->i; aj = Aloc->j;
2005   bi = Bloc->i; bj = Bloc->j;
2006   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2007     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2008     PetscSFNode          *oloc;
2009     PETSC_UNUSED PetscSF sf;
2010 
2011     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2012     /* compute d_nnz for preallocation */
2013     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2014     for (i=0; i<ai[ma]; i++) {
2015       d_nnz[aj[i]]++;
2016     }
2017     /* compute local off-diagonal contributions */
2018     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2019     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2020     /* map those to global */
2021     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2022     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2023     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2024     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2025     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2026     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2027     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2028 
2029     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2030     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2031     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2032     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2033     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2034     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2035   } else {
2036     B    = *matout;
2037     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2038   }
2039 
2040   b           = (Mat_MPIAIJ*)B->data;
2041   A_diag      = a->A;
2042   B_diag      = &b->A;
2043   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2044   A_diag_ncol = A_diag->cmap->N;
2045   B_diag_ilen = sub_B_diag->ilen;
2046   B_diag_i    = sub_B_diag->i;
2047 
2048   /* Set ilen for diagonal of B */
2049   for (i=0; i<A_diag_ncol; i++) {
2050     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2051   }
2052 
2053   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2054   very quickly (=without using MatSetValues), because all writes are local. */
2055   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2056 
2057   /* copy over the B part */
2058   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2059   array = Bloc->a;
2060   row   = A->rmap->rstart;
2061   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2062   cols_tmp = cols;
2063   for (i=0; i<mb; i++) {
2064     ncol = bi[i+1]-bi[i];
2065     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2066     row++;
2067     array += ncol; cols_tmp += ncol;
2068   }
2069   ierr = PetscFree(cols);CHKERRQ(ierr);
2070 
2071   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2072   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2074     *matout = B;
2075   } else {
2076     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2077   }
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2082 {
2083   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2084   Mat            a    = aij->A,b = aij->B;
2085   PetscErrorCode ierr;
2086   PetscInt       s1,s2,s3;
2087 
2088   PetscFunctionBegin;
2089   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2090   if (rr) {
2091     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2092     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2093     /* Overlap communication with computation. */
2094     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2095   }
2096   if (ll) {
2097     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2098     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2099     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2100   }
2101   /* scale  the diagonal block */
2102   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2103 
2104   if (rr) {
2105     /* Do a scatter end and then right scale the off-diagonal block */
2106     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2107     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2113 {
2114   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2115   PetscErrorCode ierr;
2116 
2117   PetscFunctionBegin;
2118   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2123 {
2124   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2125   Mat            a,b,c,d;
2126   PetscBool      flg;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   a = matA->A; b = matA->B;
2131   c = matB->A; d = matB->B;
2132 
2133   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2134   if (flg) {
2135     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2136   }
2137   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2142 {
2143   PetscErrorCode ierr;
2144   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2145   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2146 
2147   PetscFunctionBegin;
2148   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2149   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2150     /* because of the column compression in the off-processor part of the matrix a->B,
2151        the number of columns in a->B and b->B may be different, hence we cannot call
2152        the MatCopy() directly on the two parts. If need be, we can provide a more
2153        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2154        then copying the submatrices */
2155     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2156   } else {
2157     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2158     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2159   }
2160   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2165 {
2166   PetscErrorCode ierr;
2167 
2168   PetscFunctionBegin;
2169   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 /*
2174    Computes the number of nonzeros per row needed for preallocation when X and Y
2175    have different nonzero structure.
2176 */
2177 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2178 {
2179   PetscInt       i,j,k,nzx,nzy;
2180 
2181   PetscFunctionBegin;
2182   /* Set the number of nonzeros in the new matrix */
2183   for (i=0; i<m; i++) {
2184     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2185     nzx = xi[i+1] - xi[i];
2186     nzy = yi[i+1] - yi[i];
2187     nnz[i] = 0;
2188     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2189       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2190       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2191       nnz[i]++;
2192     }
2193     for (; k<nzy; k++) nnz[i]++;
2194   }
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2199 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2200 {
2201   PetscErrorCode ierr;
2202   PetscInt       m = Y->rmap->N;
2203   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2204   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2205 
2206   PetscFunctionBegin;
2207   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2212 {
2213   PetscErrorCode ierr;
2214   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2215   PetscBLASInt   bnz,one=1;
2216   Mat_SeqAIJ     *x,*y;
2217 
2218   PetscFunctionBegin;
2219   if (str == SAME_NONZERO_PATTERN) {
2220     PetscScalar alpha = a;
2221     x    = (Mat_SeqAIJ*)xx->A->data;
2222     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2223     y    = (Mat_SeqAIJ*)yy->A->data;
2224     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2225     x    = (Mat_SeqAIJ*)xx->B->data;
2226     y    = (Mat_SeqAIJ*)yy->B->data;
2227     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2228     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2229     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2230     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2231        will be updated */
2232 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2233     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2234       Y->offloadmask = PETSC_OFFLOAD_CPU;
2235     }
2236 #endif
2237   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2238     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2239   } else {
2240     Mat      B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2248     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2250     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2251     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2252     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2253     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2255     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2261 
2262 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2263 {
2264 #if defined(PETSC_USE_COMPLEX)
2265   PetscErrorCode ierr;
2266   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2270   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2271 #else
2272   PetscFunctionBegin;
2273 #endif
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2278 {
2279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2284   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2289 {
2290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2295   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2300 {
2301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2302   PetscErrorCode ierr;
2303   PetscInt       i,*idxb = 0;
2304   PetscScalar    *va,*vb;
2305   Vec            vtmp;
2306 
2307   PetscFunctionBegin;
2308   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2309   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2310   if (idx) {
2311     for (i=0; i<A->rmap->n; i++) {
2312       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2313     }
2314   }
2315 
2316   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2317   if (idx) {
2318     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2319   }
2320   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2321   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2322 
2323   for (i=0; i<A->rmap->n; i++) {
2324     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2325       va[i] = vb[i];
2326       if (idx) idx[i] = a->garray[idxb[i]];
2327     }
2328   }
2329 
2330   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2331   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2332   ierr = PetscFree(idxb);CHKERRQ(ierr);
2333   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2338 {
2339   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,*idxb = 0;
2342   PetscScalar    *va,*vb;
2343   Vec            vtmp;
2344 
2345   PetscFunctionBegin;
2346   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2347   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2348   if (idx) {
2349     for (i=0; i<A->cmap->n; i++) {
2350       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2351     }
2352   }
2353 
2354   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2355   if (idx) {
2356     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2357   }
2358   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2359   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2360 
2361   for (i=0; i<A->rmap->n; i++) {
2362     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2363       va[i] = vb[i];
2364       if (idx) idx[i] = a->garray[idxb[i]];
2365     }
2366   }
2367 
2368   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2369   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2370   ierr = PetscFree(idxb);CHKERRQ(ierr);
2371   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2376 {
2377   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2378   PetscInt       n      = A->rmap->n;
2379   PetscInt       cstart = A->cmap->rstart;
2380   PetscInt       *cmap  = mat->garray;
2381   PetscInt       *diagIdx, *offdiagIdx;
2382   Vec            diagV, offdiagV;
2383   PetscScalar    *a, *diagA, *offdiagA;
2384   PetscInt       r;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2391   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2392   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2394   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2395   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   for (r = 0; r < n; ++r) {
2397     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2398       a[r]   = diagA[r];
2399       idx[r] = cstart + diagIdx[r];
2400     } else {
2401       a[r]   = offdiagA[r];
2402       idx[r] = cmap[offdiagIdx[r]];
2403     }
2404   }
2405   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2409   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2410   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2415 {
2416   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2417   PetscInt       n      = A->rmap->n;
2418   PetscInt       cstart = A->cmap->rstart;
2419   PetscInt       *cmap  = mat->garray;
2420   PetscInt       *diagIdx, *offdiagIdx;
2421   Vec            diagV, offdiagV;
2422   PetscScalar    *a, *diagA, *offdiagA;
2423   PetscInt       r;
2424   PetscErrorCode ierr;
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2428   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2429   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2430   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2431   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2432   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2433   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2434   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2435   for (r = 0; r < n; ++r) {
2436     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2437       a[r]   = diagA[r];
2438       idx[r] = cstart + diagIdx[r];
2439     } else {
2440       a[r]   = offdiagA[r];
2441       idx[r] = cmap[offdiagIdx[r]];
2442     }
2443   }
2444   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2446   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2447   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2448   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2449   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2454 {
2455   PetscErrorCode ierr;
2456   Mat            *dummy;
2457 
2458   PetscFunctionBegin;
2459   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2460   *newmat = *dummy;
2461   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2466 {
2467   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2468   PetscErrorCode ierr;
2469 
2470   PetscFunctionBegin;
2471   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2472   A->factorerrortype = a->A->factorerrortype;
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2477 {
2478   PetscErrorCode ierr;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2480 
2481   PetscFunctionBegin;
2482   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2483   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2484   if (x->assembled) {
2485     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2486   } else {
2487     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2488   }
2489   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2490   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2495 {
2496   PetscFunctionBegin;
2497   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2498   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 /*@
2503    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2504 
2505    Collective on Mat
2506 
2507    Input Parameters:
2508 +    A - the matrix
2509 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2510 
2511  Level: advanced
2512 
2513 @*/
2514 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2515 {
2516   PetscErrorCode       ierr;
2517 
2518   PetscFunctionBegin;
2519   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2524 {
2525   PetscErrorCode       ierr;
2526   PetscBool            sc = PETSC_FALSE,flg;
2527 
2528   PetscFunctionBegin;
2529   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2530   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2531   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2532   if (flg) {
2533     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2534   }
2535   ierr = PetscOptionsTail();CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2540 {
2541   PetscErrorCode ierr;
2542   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2543   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2544 
2545   PetscFunctionBegin;
2546   if (!Y->preallocated) {
2547     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2548   } else if (!aij->nz) {
2549     PetscInt nonew = aij->nonew;
2550     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2551     aij->nonew = nonew;
2552   }
2553   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2558 {
2559   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2560   PetscErrorCode ierr;
2561 
2562   PetscFunctionBegin;
2563   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2564   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2565   if (d) {
2566     PetscInt rstart;
2567     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2568     *d += rstart;
2569 
2570   }
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2575 {
2576   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /* -------------------------------------------------------------------*/
2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2586                                        MatGetRow_MPIAIJ,
2587                                        MatRestoreRow_MPIAIJ,
2588                                        MatMult_MPIAIJ,
2589                                 /* 4*/ MatMultAdd_MPIAIJ,
2590                                        MatMultTranspose_MPIAIJ,
2591                                        MatMultTransposeAdd_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*10*/ 0,
2596                                        0,
2597                                        0,
2598                                        MatSOR_MPIAIJ,
2599                                        MatTranspose_MPIAIJ,
2600                                 /*15*/ MatGetInfo_MPIAIJ,
2601                                        MatEqual_MPIAIJ,
2602                                        MatGetDiagonal_MPIAIJ,
2603                                        MatDiagonalScale_MPIAIJ,
2604                                        MatNorm_MPIAIJ,
2605                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2606                                        MatAssemblyEnd_MPIAIJ,
2607                                        MatSetOption_MPIAIJ,
2608                                        MatZeroEntries_MPIAIJ,
2609                                 /*24*/ MatZeroRows_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*29*/ MatSetUp_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        MatGetDiagonalBlock_MPIAIJ,
2618                                        0,
2619                                 /*34*/ MatDuplicate_MPIAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*39*/ MatAXPY_MPIAIJ,
2625                                        MatCreateSubMatrices_MPIAIJ,
2626                                        MatIncreaseOverlap_MPIAIJ,
2627                                        MatGetValues_MPIAIJ,
2628                                        MatCopy_MPIAIJ,
2629                                 /*44*/ MatGetRowMax_MPIAIJ,
2630                                        MatScale_MPIAIJ,
2631                                        MatShift_MPIAIJ,
2632                                        MatDiagonalSet_MPIAIJ,
2633                                        MatZeroRowsColumns_MPIAIJ,
2634                                 /*49*/ MatSetRandom_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2640                                        0,
2641                                        MatSetUnfactored_MPIAIJ,
2642                                        MatPermute_MPIAIJ,
2643                                        0,
2644                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2645                                        MatDestroy_MPIAIJ,
2646                                        MatView_MPIAIJ,
2647                                        0,
2648                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2649                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2650                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2655                                        MatGetRowMinAbs_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*75*/ MatFDColoringApply_AIJ,
2661                                        MatSetFromOptions_MPIAIJ,
2662                                        0,
2663                                        0,
2664                                        MatFindZeroDiagonals_MPIAIJ,
2665                                 /*80*/ 0,
2666                                        0,
2667                                        0,
2668                                 /*83*/ MatLoad_MPIAIJ,
2669                                        MatIsSymmetric_MPIAIJ,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2675                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2676                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2677                                        MatPtAP_MPIAIJ_MPIAIJ,
2678                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2679                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatBindToCPU_MPIAIJ,
2684                                 /*99*/ 0,
2685                                        0,
2686                                        0,
2687                                        MatConjugate_MPIAIJ,
2688                                        0,
2689                                 /*104*/MatSetValuesRow_MPIAIJ,
2690                                        MatRealPart_MPIAIJ,
2691                                        MatImaginaryPart_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                 /*109*/0,
2695                                        0,
2696                                        MatGetRowMin_MPIAIJ,
2697                                        0,
2698                                        MatMissingDiagonal_MPIAIJ,
2699                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2700                                        0,
2701                                        MatGetGhosts_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                 /*119*/0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        MatGetMultiProcBlock_MPIAIJ,
2709                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2710                                        MatGetColumnNorms_MPIAIJ,
2711                                        MatInvertBlockDiagonal_MPIAIJ,
2712                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2713                                        MatCreateSubMatricesMPI_MPIAIJ,
2714                                 /*129*/0,
2715                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2716                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2717                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2718                                        0,
2719                                 /*134*/0,
2720                                        0,
2721                                        MatRARt_MPIAIJ_MPIAIJ,
2722                                        0,
2723                                        0,
2724                                 /*139*/MatSetBlockSizes_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                        MatFDColoringSetUp_MPIXAIJ,
2728                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2729                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2730 };
2731 
2732 /* ----------------------------------------------------------------------------------------*/
2733 
2734 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2735 {
2736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2737   PetscErrorCode ierr;
2738 
2739   PetscFunctionBegin;
2740   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2741   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2742   PetscFunctionReturn(0);
2743 }
2744 
2745 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2746 {
2747   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2752   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2757 {
2758   Mat_MPIAIJ     *b;
2759   PetscErrorCode ierr;
2760   PetscMPIInt    size;
2761 
2762   PetscFunctionBegin;
2763   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2765   b = (Mat_MPIAIJ*)B->data;
2766 
2767 #if defined(PETSC_USE_CTABLE)
2768   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2769 #else
2770   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2771 #endif
2772   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2773   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2774   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2775 
2776   /* Because the B will have been resized we simply destroy it and create a new one each time */
2777   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2778   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2779   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2780   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2781   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2782   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2783   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2784 
2785   if (!B->preallocated) {
2786     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2787     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2788     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2791   }
2792 
2793   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2794   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2795   B->preallocated  = PETSC_TRUE;
2796   B->was_assembled = PETSC_FALSE;
2797   B->assembled     = PETSC_FALSE;
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2802 {
2803   Mat_MPIAIJ     *b;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2808   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2809   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2810   b = (Mat_MPIAIJ*)B->data;
2811 
2812 #if defined(PETSC_USE_CTABLE)
2813   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2814 #else
2815   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2816 #endif
2817   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2818   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2819   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2820 
2821   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2822   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2823   B->preallocated  = PETSC_TRUE;
2824   B->was_assembled = PETSC_FALSE;
2825   B->assembled = PETSC_FALSE;
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2830 {
2831   Mat            mat;
2832   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2833   PetscErrorCode ierr;
2834 
2835   PetscFunctionBegin;
2836   *newmat = 0;
2837   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2838   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2839   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2840   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2841   a       = (Mat_MPIAIJ*)mat->data;
2842 
2843   mat->factortype   = matin->factortype;
2844   mat->assembled    = matin->assembled;
2845   mat->insertmode   = NOT_SET_VALUES;
2846   mat->preallocated = matin->preallocated;
2847 
2848   a->size         = oldmat->size;
2849   a->rank         = oldmat->rank;
2850   a->donotstash   = oldmat->donotstash;
2851   a->roworiented  = oldmat->roworiented;
2852   a->rowindices   = NULL;
2853   a->rowvalues    = NULL;
2854   a->getrowactive = PETSC_FALSE;
2855 
2856   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2857   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2858 
2859   if (oldmat->colmap) {
2860 #if defined(PETSC_USE_CTABLE)
2861     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2862 #else
2863     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2864     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2865     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2866 #endif
2867   } else a->colmap = NULL;
2868   if (oldmat->garray) {
2869     PetscInt len;
2870     len  = oldmat->B->cmap->n;
2871     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2872     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2873     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2874   } else a->garray = NULL;
2875 
2876   /* It may happen MatDuplicate is called with a non-assembled matrix
2877      In fact, MatDuplicate only requires the matrix to be preallocated
2878      This may happen inside a DMCreateMatrix_Shell */
2879   if (oldmat->lvec) {
2880     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2881     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2882   }
2883   if (oldmat->Mvctx) {
2884     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2885     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2886   }
2887   if (oldmat->Mvctx_mpi1) {
2888     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2890   }
2891 
2892   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2893   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2894   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2895   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2896   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2897   *newmat = mat;
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2902 {
2903   PetscBool      isbinary, ishdf5;
2904   PetscErrorCode ierr;
2905 
2906   PetscFunctionBegin;
2907   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2908   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2909   /* force binary viewer to load .info file if it has not yet done so */
2910   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2911   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2912   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2913   if (isbinary) {
2914     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2915   } else if (ishdf5) {
2916 #if defined(PETSC_HAVE_HDF5)
2917     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2918 #else
2919     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2920 #endif
2921   } else {
2922     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2923   }
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2928 {
2929   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2930   PetscInt       *rowidxs,*colidxs;
2931   PetscScalar    *matvals;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2936 
2937   /* read in matrix header */
2938   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2939   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2940   M  = header[1]; N = header[2]; nz = header[3];
2941   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
2942   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
2943   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2944 
2945   /* set block sizes from the viewer's .info file */
2946   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
2947   /* set global sizes if not set already */
2948   if (mat->rmap->N < 0) mat->rmap->N = M;
2949   if (mat->cmap->N < 0) mat->cmap->N = N;
2950   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
2951   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
2952 
2953   /* check if the matrix sizes are correct */
2954   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2955   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
2956 
2957   /* read in row lengths and build row indices */
2958   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
2959   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
2960   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
2961   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2962   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
2963   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
2964   /* read in column indices and matrix values */
2965   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
2966   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
2967   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
2968   /* store matrix indices and values */
2969   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
2970   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
2971   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 /* Not scalable because of ISAllGather() unless getting all columns. */
2976 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2977 {
2978   PetscErrorCode ierr;
2979   IS             iscol_local;
2980   PetscBool      isstride;
2981   PetscMPIInt    lisstride=0,gisstride;
2982 
2983   PetscFunctionBegin;
2984   /* check if we are grabbing all columns*/
2985   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2986 
2987   if (isstride) {
2988     PetscInt  start,len,mstart,mlen;
2989     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2990     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2991     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2992     if (mstart == start && mlen-mstart == len) lisstride = 1;
2993   }
2994 
2995   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2996   if (gisstride) {
2997     PetscInt N;
2998     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2999     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3000     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3001     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3002   } else {
3003     PetscInt cbs;
3004     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3005     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3006     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3007   }
3008 
3009   *isseq = iscol_local;
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 /*
3014  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3015  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3016 
3017  Input Parameters:
3018    mat - matrix
3019    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3020            i.e., mat->rstart <= isrow[i] < mat->rend
3021    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3022            i.e., mat->cstart <= iscol[i] < mat->cend
3023  Output Parameter:
3024    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3025    iscol_o - sequential column index set for retrieving mat->B
3026    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3027  */
3028 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3029 {
3030   PetscErrorCode ierr;
3031   Vec            x,cmap;
3032   const PetscInt *is_idx;
3033   PetscScalar    *xarray,*cmaparray;
3034   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3035   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3036   Mat            B=a->B;
3037   Vec            lvec=a->lvec,lcmap;
3038   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3039   MPI_Comm       comm;
3040   VecScatter     Mvctx=a->Mvctx;
3041 
3042   PetscFunctionBegin;
3043   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3044   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3045 
3046   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3047   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3048   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3049   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3050   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3051 
3052   /* Get start indices */
3053   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3054   isstart -= ncols;
3055   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3056 
3057   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3058   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3059   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3060   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3061   for (i=0; i<ncols; i++) {
3062     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3063     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3064     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3065   }
3066   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3067   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3068   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3069 
3070   /* Get iscol_d */
3071   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3072   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3073   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3074 
3075   /* Get isrow_d */
3076   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3077   rstart = mat->rmap->rstart;
3078   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3079   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3080   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3081   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3082 
3083   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3084   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3085   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3086 
3087   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3088   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3089   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3090 
3091   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3092 
3093   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3094   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3095 
3096   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3097   /* off-process column indices */
3098   count = 0;
3099   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3100   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3101 
3102   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3103   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3104   for (i=0; i<Bn; i++) {
3105     if (PetscRealPart(xarray[i]) > -1.0) {
3106       idx[count]     = i;                   /* local column index in off-diagonal part B */
3107       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3108       count++;
3109     }
3110   }
3111   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3112   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3113 
3114   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3115   /* cannot ensure iscol_o has same blocksize as iscol! */
3116 
3117   ierr = PetscFree(idx);CHKERRQ(ierr);
3118   *garray = cmap1;
3119 
3120   ierr = VecDestroy(&x);CHKERRQ(ierr);
3121   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3122   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3123   PetscFunctionReturn(0);
3124 }
3125 
3126 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3127 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3128 {
3129   PetscErrorCode ierr;
3130   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3131   Mat            M = NULL;
3132   MPI_Comm       comm;
3133   IS             iscol_d,isrow_d,iscol_o;
3134   Mat            Asub = NULL,Bsub = NULL;
3135   PetscInt       n;
3136 
3137   PetscFunctionBegin;
3138   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3139 
3140   if (call == MAT_REUSE_MATRIX) {
3141     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3142     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3143     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3144 
3145     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3146     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3147 
3148     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3149     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3150 
3151     /* Update diagonal and off-diagonal portions of submat */
3152     asub = (Mat_MPIAIJ*)(*submat)->data;
3153     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3154     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3155     if (n) {
3156       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3157     }
3158     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160 
3161   } else { /* call == MAT_INITIAL_MATRIX) */
3162     const PetscInt *garray;
3163     PetscInt        BsubN;
3164 
3165     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3166     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3167 
3168     /* Create local submatrices Asub and Bsub */
3169     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3170     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3171 
3172     /* Create submatrix M */
3173     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3174 
3175     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3176     asub = (Mat_MPIAIJ*)M->data;
3177 
3178     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3179     n = asub->B->cmap->N;
3180     if (BsubN > n) {
3181       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3182       const PetscInt *idx;
3183       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3184       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3185 
3186       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3187       j = 0;
3188       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3189       for (i=0; i<n; i++) {
3190         if (j >= BsubN) break;
3191         while (subgarray[i] > garray[j]) j++;
3192 
3193         if (subgarray[i] == garray[j]) {
3194           idx_new[i] = idx[j++];
3195         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3196       }
3197       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3198 
3199       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3200       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3201 
3202     } else if (BsubN < n) {
3203       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3204     }
3205 
3206     ierr = PetscFree(garray);CHKERRQ(ierr);
3207     *submat = M;
3208 
3209     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3210     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3211     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3212 
3213     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3214     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3215 
3216     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3217     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3218   }
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3223 {
3224   PetscErrorCode ierr;
3225   IS             iscol_local=NULL,isrow_d;
3226   PetscInt       csize;
3227   PetscInt       n,i,j,start,end;
3228   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3229   MPI_Comm       comm;
3230 
3231   PetscFunctionBegin;
3232   /* If isrow has same processor distribution as mat,
3233      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3234   if (call == MAT_REUSE_MATRIX) {
3235     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3236     if (isrow_d) {
3237       sameRowDist  = PETSC_TRUE;
3238       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3239     } else {
3240       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3241       if (iscol_local) {
3242         sameRowDist  = PETSC_TRUE;
3243         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3244       }
3245     }
3246   } else {
3247     /* Check if isrow has same processor distribution as mat */
3248     sameDist[0] = PETSC_FALSE;
3249     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3250     if (!n) {
3251       sameDist[0] = PETSC_TRUE;
3252     } else {
3253       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3254       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3255       if (i >= start && j < end) {
3256         sameDist[0] = PETSC_TRUE;
3257       }
3258     }
3259 
3260     /* Check if iscol has same processor distribution as mat */
3261     sameDist[1] = PETSC_FALSE;
3262     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3263     if (!n) {
3264       sameDist[1] = PETSC_TRUE;
3265     } else {
3266       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3267       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3268       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3269     }
3270 
3271     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3272     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3273     sameRowDist = tsameDist[0];
3274   }
3275 
3276   if (sameRowDist) {
3277     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3278       /* isrow and iscol have same processor distribution as mat */
3279       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3280       PetscFunctionReturn(0);
3281     } else { /* sameRowDist */
3282       /* isrow has same processor distribution as mat */
3283       if (call == MAT_INITIAL_MATRIX) {
3284         PetscBool sorted;
3285         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3286         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3287         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3288         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3289 
3290         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3291         if (sorted) {
3292           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3293           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3294           PetscFunctionReturn(0);
3295         }
3296       } else { /* call == MAT_REUSE_MATRIX */
3297         IS    iscol_sub;
3298         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3299         if (iscol_sub) {
3300           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3301           PetscFunctionReturn(0);
3302         }
3303       }
3304     }
3305   }
3306 
3307   /* General case: iscol -> iscol_local which has global size of iscol */
3308   if (call == MAT_REUSE_MATRIX) {
3309     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3310     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3311   } else {
3312     if (!iscol_local) {
3313       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3314     }
3315   }
3316 
3317   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3318   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3319 
3320   if (call == MAT_INITIAL_MATRIX) {
3321     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3322     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3323   }
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 /*@C
3328      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3329          and "off-diagonal" part of the matrix in CSR format.
3330 
3331    Collective
3332 
3333    Input Parameters:
3334 +  comm - MPI communicator
3335 .  A - "diagonal" portion of matrix
3336 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3337 -  garray - global index of B columns
3338 
3339    Output Parameter:
3340 .   mat - the matrix, with input A as its local diagonal matrix
3341    Level: advanced
3342 
3343    Notes:
3344        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3345        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3346 
3347 .seealso: MatCreateMPIAIJWithSplitArrays()
3348 @*/
3349 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3350 {
3351   PetscErrorCode ierr;
3352   Mat_MPIAIJ     *maij;
3353   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3354   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3355   PetscScalar    *oa=b->a;
3356   Mat            Bnew;
3357   PetscInt       m,n,N;
3358 
3359   PetscFunctionBegin;
3360   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3361   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3362   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3363   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3364   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3365   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3366 
3367   /* Get global columns of mat */
3368   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3369 
3370   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3371   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3372   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3373   maij = (Mat_MPIAIJ*)(*mat)->data;
3374 
3375   (*mat)->preallocated = PETSC_TRUE;
3376 
3377   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3378   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3379 
3380   /* Set A as diagonal portion of *mat */
3381   maij->A = A;
3382 
3383   nz = oi[m];
3384   for (i=0; i<nz; i++) {
3385     col   = oj[i];
3386     oj[i] = garray[col];
3387   }
3388 
3389    /* Set Bnew as off-diagonal portion of *mat */
3390   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3391   bnew        = (Mat_SeqAIJ*)Bnew->data;
3392   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3393   maij->B     = Bnew;
3394 
3395   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3396 
3397   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3398   b->free_a       = PETSC_FALSE;
3399   b->free_ij      = PETSC_FALSE;
3400   ierr = MatDestroy(&B);CHKERRQ(ierr);
3401 
3402   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3403   bnew->free_a       = PETSC_TRUE;
3404   bnew->free_ij      = PETSC_TRUE;
3405 
3406   /* condense columns of maij->B */
3407   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3408   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3409   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3410   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3411   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3416 
3417 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3418 {
3419   PetscErrorCode ierr;
3420   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3421   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3422   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3423   Mat            M,Msub,B=a->B;
3424   MatScalar      *aa;
3425   Mat_SeqAIJ     *aij;
3426   PetscInt       *garray = a->garray,*colsub,Ncols;
3427   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3428   IS             iscol_sub,iscmap;
3429   const PetscInt *is_idx,*cmap;
3430   PetscBool      allcolumns=PETSC_FALSE;
3431   MPI_Comm       comm;
3432 
3433   PetscFunctionBegin;
3434   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3435 
3436   if (call == MAT_REUSE_MATRIX) {
3437     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3438     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3439     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3440 
3441     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3442     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3443 
3444     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3445     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3446 
3447     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3448 
3449   } else { /* call == MAT_INITIAL_MATRIX) */
3450     PetscBool flg;
3451 
3452     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3453     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3454 
3455     /* (1) iscol -> nonscalable iscol_local */
3456     /* Check for special case: each processor gets entire matrix columns */
3457     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3458     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3459     if (allcolumns) {
3460       iscol_sub = iscol_local;
3461       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3462       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3463 
3464     } else {
3465       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3466       PetscInt *idx,*cmap1,k;
3467       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3468       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3469       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3470       count = 0;
3471       k     = 0;
3472       for (i=0; i<Ncols; i++) {
3473         j = is_idx[i];
3474         if (j >= cstart && j < cend) {
3475           /* diagonal part of mat */
3476           idx[count]     = j;
3477           cmap1[count++] = i; /* column index in submat */
3478         } else if (Bn) {
3479           /* off-diagonal part of mat */
3480           if (j == garray[k]) {
3481             idx[count]     = j;
3482             cmap1[count++] = i;  /* column index in submat */
3483           } else if (j > garray[k]) {
3484             while (j > garray[k] && k < Bn-1) k++;
3485             if (j == garray[k]) {
3486               idx[count]     = j;
3487               cmap1[count++] = i; /* column index in submat */
3488             }
3489           }
3490         }
3491       }
3492       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3493 
3494       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3495       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3496       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3497 
3498       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3499     }
3500 
3501     /* (3) Create sequential Msub */
3502     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3503   }
3504 
3505   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3506   aij  = (Mat_SeqAIJ*)(Msub)->data;
3507   ii   = aij->i;
3508   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3509 
3510   /*
3511       m - number of local rows
3512       Ncols - number of columns (same on all processors)
3513       rstart - first row in new global matrix generated
3514   */
3515   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3516 
3517   if (call == MAT_INITIAL_MATRIX) {
3518     /* (4) Create parallel newmat */
3519     PetscMPIInt    rank,size;
3520     PetscInt       csize;
3521 
3522     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3523     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3524 
3525     /*
3526         Determine the number of non-zeros in the diagonal and off-diagonal
3527         portions of the matrix in order to do correct preallocation
3528     */
3529 
3530     /* first get start and end of "diagonal" columns */
3531     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3532     if (csize == PETSC_DECIDE) {
3533       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3534       if (mglobal == Ncols) { /* square matrix */
3535         nlocal = m;
3536       } else {
3537         nlocal = Ncols/size + ((Ncols % size) > rank);
3538       }
3539     } else {
3540       nlocal = csize;
3541     }
3542     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3543     rstart = rend - nlocal;
3544     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3545 
3546     /* next, compute all the lengths */
3547     jj    = aij->j;
3548     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3549     olens = dlens + m;
3550     for (i=0; i<m; i++) {
3551       jend = ii[i+1] - ii[i];
3552       olen = 0;
3553       dlen = 0;
3554       for (j=0; j<jend; j++) {
3555         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3556         else dlen++;
3557         jj++;
3558       }
3559       olens[i] = olen;
3560       dlens[i] = dlen;
3561     }
3562 
3563     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3564     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3565 
3566     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3567     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3568     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3569     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3570     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3571     ierr = PetscFree(dlens);CHKERRQ(ierr);
3572 
3573   } else { /* call == MAT_REUSE_MATRIX */
3574     M    = *newmat;
3575     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3576     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3577     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3578     /*
3579          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3580        rather than the slower MatSetValues().
3581     */
3582     M->was_assembled = PETSC_TRUE;
3583     M->assembled     = PETSC_FALSE;
3584   }
3585 
3586   /* (5) Set values of Msub to *newmat */
3587   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3588   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3589 
3590   jj   = aij->j;
3591   aa   = aij->a;
3592   for (i=0; i<m; i++) {
3593     row = rstart + i;
3594     nz  = ii[i+1] - ii[i];
3595     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3596     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3597     jj += nz; aa += nz;
3598   }
3599   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3600 
3601   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3602   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3603 
3604   ierr = PetscFree(colsub);CHKERRQ(ierr);
3605 
3606   /* save Msub, iscol_sub and iscmap used in processor for next request */
3607   if (call ==  MAT_INITIAL_MATRIX) {
3608     *newmat = M;
3609     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3610     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3611 
3612     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3613     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3614 
3615     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3616     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3617 
3618     if (iscol_local) {
3619       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3620       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3621     }
3622   }
3623   PetscFunctionReturn(0);
3624 }
3625 
3626 /*
3627     Not great since it makes two copies of the submatrix, first an SeqAIJ
3628   in local and then by concatenating the local matrices the end result.
3629   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3630 
3631   Note: This requires a sequential iscol with all indices.
3632 */
3633 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3634 {
3635   PetscErrorCode ierr;
3636   PetscMPIInt    rank,size;
3637   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3638   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3639   Mat            M,Mreuse;
3640   MatScalar      *aa,*vwork;
3641   MPI_Comm       comm;
3642   Mat_SeqAIJ     *aij;
3643   PetscBool      colflag,allcolumns=PETSC_FALSE;
3644 
3645   PetscFunctionBegin;
3646   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3647   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3648   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3649 
3650   /* Check for special case: each processor gets entire matrix columns */
3651   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3652   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3653   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3654 
3655   if (call ==  MAT_REUSE_MATRIX) {
3656     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3657     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3658     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3659   } else {
3660     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3661   }
3662 
3663   /*
3664       m - number of local rows
3665       n - number of columns (same on all processors)
3666       rstart - first row in new global matrix generated
3667   */
3668   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3669   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3670   if (call == MAT_INITIAL_MATRIX) {
3671     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3672     ii  = aij->i;
3673     jj  = aij->j;
3674 
3675     /*
3676         Determine the number of non-zeros in the diagonal and off-diagonal
3677         portions of the matrix in order to do correct preallocation
3678     */
3679 
3680     /* first get start and end of "diagonal" columns */
3681     if (csize == PETSC_DECIDE) {
3682       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3683       if (mglobal == n) { /* square matrix */
3684         nlocal = m;
3685       } else {
3686         nlocal = n/size + ((n % size) > rank);
3687       }
3688     } else {
3689       nlocal = csize;
3690     }
3691     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3692     rstart = rend - nlocal;
3693     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3694 
3695     /* next, compute all the lengths */
3696     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3697     olens = dlens + m;
3698     for (i=0; i<m; i++) {
3699       jend = ii[i+1] - ii[i];
3700       olen = 0;
3701       dlen = 0;
3702       for (j=0; j<jend; j++) {
3703         if (*jj < rstart || *jj >= rend) olen++;
3704         else dlen++;
3705         jj++;
3706       }
3707       olens[i] = olen;
3708       dlens[i] = dlen;
3709     }
3710     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3711     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3712     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3713     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3714     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3715     ierr = PetscFree(dlens);CHKERRQ(ierr);
3716   } else {
3717     PetscInt ml,nl;
3718 
3719     M    = *newmat;
3720     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3721     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3722     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3723     /*
3724          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3725        rather than the slower MatSetValues().
3726     */
3727     M->was_assembled = PETSC_TRUE;
3728     M->assembled     = PETSC_FALSE;
3729   }
3730   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3731   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3732   ii   = aij->i;
3733   jj   = aij->j;
3734   aa   = aij->a;
3735   for (i=0; i<m; i++) {
3736     row   = rstart + i;
3737     nz    = ii[i+1] - ii[i];
3738     cwork = jj;     jj += nz;
3739     vwork = aa;     aa += nz;
3740     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3741   }
3742 
3743   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3744   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3745   *newmat = M;
3746 
3747   /* save submatrix used in processor for next request */
3748   if (call ==  MAT_INITIAL_MATRIX) {
3749     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3750     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3751   }
3752   PetscFunctionReturn(0);
3753 }
3754 
3755 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3756 {
3757   PetscInt       m,cstart, cend,j,nnz,i,d;
3758   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3759   const PetscInt *JJ;
3760   PetscErrorCode ierr;
3761   PetscBool      nooffprocentries;
3762 
3763   PetscFunctionBegin;
3764   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3765 
3766   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3767   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3768   m      = B->rmap->n;
3769   cstart = B->cmap->rstart;
3770   cend   = B->cmap->rend;
3771   rstart = B->rmap->rstart;
3772 
3773   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3774 
3775 #if defined(PETSC_USE_DEBUG)
3776   for (i=0; i<m; i++) {
3777     nnz = Ii[i+1]- Ii[i];
3778     JJ  = J + Ii[i];
3779     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3780     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3781     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3782   }
3783 #endif
3784 
3785   for (i=0; i<m; i++) {
3786     nnz     = Ii[i+1]- Ii[i];
3787     JJ      = J + Ii[i];
3788     nnz_max = PetscMax(nnz_max,nnz);
3789     d       = 0;
3790     for (j=0; j<nnz; j++) {
3791       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3792     }
3793     d_nnz[i] = d;
3794     o_nnz[i] = nnz - d;
3795   }
3796   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3797   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3798 
3799   for (i=0; i<m; i++) {
3800     ii   = i + rstart;
3801     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3802   }
3803   nooffprocentries    = B->nooffprocentries;
3804   B->nooffprocentries = PETSC_TRUE;
3805   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3806   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3807   B->nooffprocentries = nooffprocentries;
3808 
3809   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3810   PetscFunctionReturn(0);
3811 }
3812 
3813 /*@
3814    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3815    (the default parallel PETSc format).
3816 
3817    Collective
3818 
3819    Input Parameters:
3820 +  B - the matrix
3821 .  i - the indices into j for the start of each local row (starts with zero)
3822 .  j - the column indices for each local row (starts with zero)
3823 -  v - optional values in the matrix
3824 
3825    Level: developer
3826 
3827    Notes:
3828        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3829      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3830      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3831 
3832        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3833 
3834        The format which is used for the sparse matrix input, is equivalent to a
3835     row-major ordering.. i.e for the following matrix, the input data expected is
3836     as shown
3837 
3838 $        1 0 0
3839 $        2 0 3     P0
3840 $       -------
3841 $        4 5 6     P1
3842 $
3843 $     Process0 [P0]: rows_owned=[0,1]
3844 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3845 $        j =  {0,0,2}  [size = 3]
3846 $        v =  {1,2,3}  [size = 3]
3847 $
3848 $     Process1 [P1]: rows_owned=[2]
3849 $        i =  {0,3}    [size = nrow+1  = 1+1]
3850 $        j =  {0,1,2}  [size = 3]
3851 $        v =  {4,5,6}  [size = 3]
3852 
3853 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3854           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3855 @*/
3856 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3857 {
3858   PetscErrorCode ierr;
3859 
3860   PetscFunctionBegin;
3861   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 /*@C
3866    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3867    (the default parallel PETSc format).  For good matrix assembly performance
3868    the user should preallocate the matrix storage by setting the parameters
3869    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3870    performance can be increased by more than a factor of 50.
3871 
3872    Collective
3873 
3874    Input Parameters:
3875 +  B - the matrix
3876 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3877            (same value is used for all local rows)
3878 .  d_nnz - array containing the number of nonzeros in the various rows of the
3879            DIAGONAL portion of the local submatrix (possibly different for each row)
3880            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3881            The size of this array is equal to the number of local rows, i.e 'm'.
3882            For matrices that will be factored, you must leave room for (and set)
3883            the diagonal entry even if it is zero.
3884 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3885            submatrix (same value is used for all local rows).
3886 -  o_nnz - array containing the number of nonzeros in the various rows of the
3887            OFF-DIAGONAL portion of the local submatrix (possibly different for
3888            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3889            structure. The size of this array is equal to the number
3890            of local rows, i.e 'm'.
3891 
3892    If the *_nnz parameter is given then the *_nz parameter is ignored
3893 
3894    The AIJ format (also called the Yale sparse matrix format or
3895    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3896    storage.  The stored row and column indices begin with zero.
3897    See Users-Manual: ch_mat for details.
3898 
3899    The parallel matrix is partitioned such that the first m0 rows belong to
3900    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3901    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3902 
3903    The DIAGONAL portion of the local submatrix of a processor can be defined
3904    as the submatrix which is obtained by extraction the part corresponding to
3905    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3906    first row that belongs to the processor, r2 is the last row belonging to
3907    the this processor, and c1-c2 is range of indices of the local part of a
3908    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3909    common case of a square matrix, the row and column ranges are the same and
3910    the DIAGONAL part is also square. The remaining portion of the local
3911    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3912 
3913    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3914 
3915    You can call MatGetInfo() to get information on how effective the preallocation was;
3916    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3917    You can also run with the option -info and look for messages with the string
3918    malloc in them to see if additional memory allocation was needed.
3919 
3920    Example usage:
3921 
3922    Consider the following 8x8 matrix with 34 non-zero values, that is
3923    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3924    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3925    as follows:
3926 
3927 .vb
3928             1  2  0  |  0  3  0  |  0  4
3929     Proc0   0  5  6  |  7  0  0  |  8  0
3930             9  0 10  | 11  0  0  | 12  0
3931     -------------------------------------
3932            13  0 14  | 15 16 17  |  0  0
3933     Proc1   0 18  0  | 19 20 21  |  0  0
3934             0  0  0  | 22 23  0  | 24  0
3935     -------------------------------------
3936     Proc2  25 26 27  |  0  0 28  | 29  0
3937            30  0  0  | 31 32 33  |  0 34
3938 .ve
3939 
3940    This can be represented as a collection of submatrices as:
3941 
3942 .vb
3943       A B C
3944       D E F
3945       G H I
3946 .ve
3947 
3948    Where the submatrices A,B,C are owned by proc0, D,E,F are
3949    owned by proc1, G,H,I are owned by proc2.
3950 
3951    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3952    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3953    The 'M','N' parameters are 8,8, and have the same values on all procs.
3954 
3955    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3956    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3957    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3958    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3959    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3960    matrix, ans [DF] as another SeqAIJ matrix.
3961 
3962    When d_nz, o_nz parameters are specified, d_nz storage elements are
3963    allocated for every row of the local diagonal submatrix, and o_nz
3964    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3965    One way to choose d_nz and o_nz is to use the max nonzerors per local
3966    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3967    In this case, the values of d_nz,o_nz are:
3968 .vb
3969      proc0 : dnz = 2, o_nz = 2
3970      proc1 : dnz = 3, o_nz = 2
3971      proc2 : dnz = 1, o_nz = 4
3972 .ve
3973    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3974    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3975    for proc3. i.e we are using 12+15+10=37 storage locations to store
3976    34 values.
3977 
3978    When d_nnz, o_nnz parameters are specified, the storage is specified
3979    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3980    In the above case the values for d_nnz,o_nnz are:
3981 .vb
3982      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3983      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3984      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3985 .ve
3986    Here the space allocated is sum of all the above values i.e 34, and
3987    hence pre-allocation is perfect.
3988 
3989    Level: intermediate
3990 
3991 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3992           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3993 @*/
3994 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3995 {
3996   PetscErrorCode ierr;
3997 
3998   PetscFunctionBegin;
3999   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4000   PetscValidType(B,1);
4001   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4002   PetscFunctionReturn(0);
4003 }
4004 
4005 /*@
4006      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4007          CSR format for the local rows.
4008 
4009    Collective
4010 
4011    Input Parameters:
4012 +  comm - MPI communicator
4013 .  m - number of local rows (Cannot be PETSC_DECIDE)
4014 .  n - This value should be the same as the local size used in creating the
4015        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4016        calculated if N is given) For square matrices n is almost always m.
4017 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4018 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4019 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4020 .   j - column indices
4021 -   a - matrix values
4022 
4023    Output Parameter:
4024 .   mat - the matrix
4025 
4026    Level: intermediate
4027 
4028    Notes:
4029        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4030      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4031      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4032 
4033        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4034 
4035        The format which is used for the sparse matrix input, is equivalent to a
4036     row-major ordering.. i.e for the following matrix, the input data expected is
4037     as shown
4038 
4039        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4040 
4041 $        1 0 0
4042 $        2 0 3     P0
4043 $       -------
4044 $        4 5 6     P1
4045 $
4046 $     Process0 [P0]: rows_owned=[0,1]
4047 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4048 $        j =  {0,0,2}  [size = 3]
4049 $        v =  {1,2,3}  [size = 3]
4050 $
4051 $     Process1 [P1]: rows_owned=[2]
4052 $        i =  {0,3}    [size = nrow+1  = 1+1]
4053 $        j =  {0,1,2}  [size = 3]
4054 $        v =  {4,5,6}  [size = 3]
4055 
4056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4057           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4058 @*/
4059 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4060 {
4061   PetscErrorCode ierr;
4062 
4063   PetscFunctionBegin;
4064   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4065   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4066   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4067   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4068   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4069   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4070   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4071   PetscFunctionReturn(0);
4072 }
4073 
4074 /*@
4075      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4076          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4077 
4078    Collective
4079 
4080    Input Parameters:
4081 +  mat - the matrix
4082 .  m - number of local rows (Cannot be PETSC_DECIDE)
4083 .  n - This value should be the same as the local size used in creating the
4084        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4085        calculated if N is given) For square matrices n is almost always m.
4086 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4087 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4088 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4089 .  J - column indices
4090 -  v - matrix values
4091 
4092    Level: intermediate
4093 
4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4095           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4096 @*/
4097 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4098 {
4099   PetscErrorCode ierr;
4100   PetscInt       cstart,nnz,i,j;
4101   PetscInt       *ld;
4102   PetscBool      nooffprocentries;
4103   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4104   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4105   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4106   const PetscInt *Adi = Ad->i;
4107   PetscInt       ldi,Iii,md;
4108 
4109   PetscFunctionBegin;
4110   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4111   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4112   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4113   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4114 
4115   cstart = mat->cmap->rstart;
4116   if (!Aij->ld) {
4117     /* count number of entries below block diagonal */
4118     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4119     Aij->ld = ld;
4120     for (i=0; i<m; i++) {
4121       nnz  = Ii[i+1]- Ii[i];
4122       j     = 0;
4123       while  (J[j] < cstart && j < nnz) {j++;}
4124       J    += nnz;
4125       ld[i] = j;
4126     }
4127   } else {
4128     ld = Aij->ld;
4129   }
4130 
4131   for (i=0; i<m; i++) {
4132     nnz  = Ii[i+1]- Ii[i];
4133     Iii  = Ii[i];
4134     ldi  = ld[i];
4135     md   = Adi[i+1]-Adi[i];
4136     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4137     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4138     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4139     ad  += md;
4140     ao  += nnz - md;
4141   }
4142   nooffprocentries      = mat->nooffprocentries;
4143   mat->nooffprocentries = PETSC_TRUE;
4144   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4145   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4146   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4147   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4148   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149   mat->nooffprocentries = nooffprocentries;
4150   PetscFunctionReturn(0);
4151 }
4152 
4153 /*@C
4154    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4155    (the default parallel PETSc format).  For good matrix assembly performance
4156    the user should preallocate the matrix storage by setting the parameters
4157    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4158    performance can be increased by more than a factor of 50.
4159 
4160    Collective
4161 
4162    Input Parameters:
4163 +  comm - MPI communicator
4164 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4165            This value should be the same as the local size used in creating the
4166            y vector for the matrix-vector product y = Ax.
4167 .  n - This value should be the same as the local size used in creating the
4168        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4169        calculated if N is given) For square matrices n is almost always m.
4170 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4171 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4172 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4173            (same value is used for all local rows)
4174 .  d_nnz - array containing the number of nonzeros in the various rows of the
4175            DIAGONAL portion of the local submatrix (possibly different for each row)
4176            or NULL, if d_nz is used to specify the nonzero structure.
4177            The size of this array is equal to the number of local rows, i.e 'm'.
4178 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4179            submatrix (same value is used for all local rows).
4180 -  o_nnz - array containing the number of nonzeros in the various rows of the
4181            OFF-DIAGONAL portion of the local submatrix (possibly different for
4182            each row) or NULL, if o_nz is used to specify the nonzero
4183            structure. The size of this array is equal to the number
4184            of local rows, i.e 'm'.
4185 
4186    Output Parameter:
4187 .  A - the matrix
4188 
4189    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4190    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4191    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4192 
4193    Notes:
4194    If the *_nnz parameter is given then the *_nz parameter is ignored
4195 
4196    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4197    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4198    storage requirements for this matrix.
4199 
4200    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4201    processor than it must be used on all processors that share the object for
4202    that argument.
4203 
4204    The user MUST specify either the local or global matrix dimensions
4205    (possibly both).
4206 
4207    The parallel matrix is partitioned across processors such that the
4208    first m0 rows belong to process 0, the next m1 rows belong to
4209    process 1, the next m2 rows belong to process 2 etc.. where
4210    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4211    values corresponding to [m x N] submatrix.
4212 
4213    The columns are logically partitioned with the n0 columns belonging
4214    to 0th partition, the next n1 columns belonging to the next
4215    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4216 
4217    The DIAGONAL portion of the local submatrix on any given processor
4218    is the submatrix corresponding to the rows and columns m,n
4219    corresponding to the given processor. i.e diagonal matrix on
4220    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4221    etc. The remaining portion of the local submatrix [m x (N-n)]
4222    constitute the OFF-DIAGONAL portion. The example below better
4223    illustrates this concept.
4224 
4225    For a square global matrix we define each processor's diagonal portion
4226    to be its local rows and the corresponding columns (a square submatrix);
4227    each processor's off-diagonal portion encompasses the remainder of the
4228    local matrix (a rectangular submatrix).
4229 
4230    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4231 
4232    When calling this routine with a single process communicator, a matrix of
4233    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4234    type of communicator, use the construction mechanism
4235 .vb
4236      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4237 .ve
4238 
4239 $     MatCreate(...,&A);
4240 $     MatSetType(A,MATMPIAIJ);
4241 $     MatSetSizes(A, m,n,M,N);
4242 $     MatMPIAIJSetPreallocation(A,...);
4243 
4244    By default, this format uses inodes (identical nodes) when possible.
4245    We search for consecutive rows with the same nonzero structure, thereby
4246    reusing matrix information to achieve increased efficiency.
4247 
4248    Options Database Keys:
4249 +  -mat_no_inode  - Do not use inodes
4250 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4251 
4252 
4253 
4254    Example usage:
4255 
4256    Consider the following 8x8 matrix with 34 non-zero values, that is
4257    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4258    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4259    as follows
4260 
4261 .vb
4262             1  2  0  |  0  3  0  |  0  4
4263     Proc0   0  5  6  |  7  0  0  |  8  0
4264             9  0 10  | 11  0  0  | 12  0
4265     -------------------------------------
4266            13  0 14  | 15 16 17  |  0  0
4267     Proc1   0 18  0  | 19 20 21  |  0  0
4268             0  0  0  | 22 23  0  | 24  0
4269     -------------------------------------
4270     Proc2  25 26 27  |  0  0 28  | 29  0
4271            30  0  0  | 31 32 33  |  0 34
4272 .ve
4273 
4274    This can be represented as a collection of submatrices as
4275 
4276 .vb
4277       A B C
4278       D E F
4279       G H I
4280 .ve
4281 
4282    Where the submatrices A,B,C are owned by proc0, D,E,F are
4283    owned by proc1, G,H,I are owned by proc2.
4284 
4285    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4286    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4287    The 'M','N' parameters are 8,8, and have the same values on all procs.
4288 
4289    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4290    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4291    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4292    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4293    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4294    matrix, ans [DF] as another SeqAIJ matrix.
4295 
4296    When d_nz, o_nz parameters are specified, d_nz storage elements are
4297    allocated for every row of the local diagonal submatrix, and o_nz
4298    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4299    One way to choose d_nz and o_nz is to use the max nonzerors per local
4300    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4301    In this case, the values of d_nz,o_nz are
4302 .vb
4303      proc0 : dnz = 2, o_nz = 2
4304      proc1 : dnz = 3, o_nz = 2
4305      proc2 : dnz = 1, o_nz = 4
4306 .ve
4307    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4308    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4309    for proc3. i.e we are using 12+15+10=37 storage locations to store
4310    34 values.
4311 
4312    When d_nnz, o_nnz parameters are specified, the storage is specified
4313    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4314    In the above case the values for d_nnz,o_nnz are
4315 .vb
4316      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4317      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4318      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4319 .ve
4320    Here the space allocated is sum of all the above values i.e 34, and
4321    hence pre-allocation is perfect.
4322 
4323    Level: intermediate
4324 
4325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4326           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4327 @*/
4328 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4329 {
4330   PetscErrorCode ierr;
4331   PetscMPIInt    size;
4332 
4333   PetscFunctionBegin;
4334   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4335   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4336   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4337   if (size > 1) {
4338     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4339     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4340   } else {
4341     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4342     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4343   }
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 /*@C
4348   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4349 
4350   Not collective
4351 
4352   Input Parameter:
4353 . A - The MPIAIJ matrix
4354 
4355   Output Parameters:
4356 + Ad - The local diagonal block as a SeqAIJ matrix
4357 . Ao - The local off-diagonal block as a SeqAIJ matrix
4358 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4359 
4360   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4361   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4362   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4363   local column numbers to global column numbers in the original matrix.
4364 
4365   Level: intermediate
4366 
4367 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ
4368 @*/
4369 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4370 {
4371   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4372   PetscBool      flg;
4373   PetscErrorCode ierr;
4374 
4375   PetscFunctionBegin;
4376   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4377   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4378   if (Ad)     *Ad     = a->A;
4379   if (Ao)     *Ao     = a->B;
4380   if (colmap) *colmap = a->garray;
4381   PetscFunctionReturn(0);
4382 }
4383 
4384 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4385 {
4386   PetscErrorCode ierr;
4387   PetscInt       m,N,i,rstart,nnz,Ii;
4388   PetscInt       *indx;
4389   PetscScalar    *values;
4390 
4391   PetscFunctionBegin;
4392   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4393   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4394     PetscInt       *dnz,*onz,sum,bs,cbs;
4395 
4396     if (n == PETSC_DECIDE) {
4397       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4398     }
4399     /* Check sum(n) = N */
4400     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4401     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4402 
4403     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4404     rstart -= m;
4405 
4406     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4407     for (i=0; i<m; i++) {
4408       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4409       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4410       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4411     }
4412 
4413     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4414     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4415     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4416     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4417     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4418     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4419     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4420     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4421   }
4422 
4423   /* numeric phase */
4424   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4425   for (i=0; i<m; i++) {
4426     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4427     Ii   = i + rstart;
4428     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4429     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4430   }
4431   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4432   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4437 {
4438   PetscErrorCode    ierr;
4439   PetscMPIInt       rank;
4440   PetscInt          m,N,i,rstart,nnz;
4441   size_t            len;
4442   const PetscInt    *indx;
4443   PetscViewer       out;
4444   char              *name;
4445   Mat               B;
4446   const PetscScalar *values;
4447 
4448   PetscFunctionBegin;
4449   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4450   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4451   /* Should this be the type of the diagonal block of A? */
4452   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4453   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4454   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4455   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4456   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4457   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4458   for (i=0; i<m; i++) {
4459     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4460     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4461     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4462   }
4463   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4464   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4465 
4466   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4467   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4468   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4469   sprintf(name,"%s.%d",outfile,rank);
4470   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4471   ierr = PetscFree(name);CHKERRQ(ierr);
4472   ierr = MatView(B,out);CHKERRQ(ierr);
4473   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4474   ierr = MatDestroy(&B);CHKERRQ(ierr);
4475   PetscFunctionReturn(0);
4476 }
4477 
4478 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4479 {
4480   PetscErrorCode      ierr;
4481   Mat_Merge_SeqsToMPI *merge;
4482   PetscContainer      container;
4483 
4484   PetscFunctionBegin;
4485   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4486   if (container) {
4487     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4500     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4501     ierr = PetscFree(merge);CHKERRQ(ierr);
4502     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4503   }
4504   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4505   PetscFunctionReturn(0);
4506 }
4507 
4508 #include <../src/mat/utils/freespace.h>
4509 #include <petscbt.h>
4510 
4511 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4512 {
4513   PetscErrorCode      ierr;
4514   MPI_Comm            comm;
4515   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4516   PetscMPIInt         size,rank,taga,*len_s;
4517   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4518   PetscInt            proc,m;
4519   PetscInt            **buf_ri,**buf_rj;
4520   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4521   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4522   MPI_Request         *s_waits,*r_waits;
4523   MPI_Status          *status;
4524   MatScalar           *aa=a->a;
4525   MatScalar           **abuf_r,*ba_i;
4526   Mat_Merge_SeqsToMPI *merge;
4527   PetscContainer      container;
4528 
4529   PetscFunctionBegin;
4530   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4531   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4532 
4533   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4534   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4535 
4536   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4537   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4538 
4539   bi     = merge->bi;
4540   bj     = merge->bj;
4541   buf_ri = merge->buf_ri;
4542   buf_rj = merge->buf_rj;
4543 
4544   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4545   owners = merge->rowmap->range;
4546   len_s  = merge->len_s;
4547 
4548   /* send and recv matrix values */
4549   /*-----------------------------*/
4550   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4551   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4552 
4553   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4554   for (proc=0,k=0; proc<size; proc++) {
4555     if (!len_s[proc]) continue;
4556     i    = owners[proc];
4557     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4558     k++;
4559   }
4560 
4561   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4562   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4563   ierr = PetscFree(status);CHKERRQ(ierr);
4564 
4565   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4566   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4567 
4568   /* insert mat values of mpimat */
4569   /*----------------------------*/
4570   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4571   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4572 
4573   for (k=0; k<merge->nrecv; k++) {
4574     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4575     nrows       = *(buf_ri_k[k]);
4576     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4577     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4578   }
4579 
4580   /* set values of ba */
4581   m = merge->rowmap->n;
4582   for (i=0; i<m; i++) {
4583     arow = owners[rank] + i;
4584     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4585     bnzi = bi[i+1] - bi[i];
4586     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4587 
4588     /* add local non-zero vals of this proc's seqmat into ba */
4589     anzi   = ai[arow+1] - ai[arow];
4590     aj     = a->j + ai[arow];
4591     aa     = a->a + ai[arow];
4592     nextaj = 0;
4593     for (j=0; nextaj<anzi; j++) {
4594       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4595         ba_i[j] += aa[nextaj++];
4596       }
4597     }
4598 
4599     /* add received vals into ba */
4600     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4601       /* i-th row */
4602       if (i == *nextrow[k]) {
4603         anzi   = *(nextai[k]+1) - *nextai[k];
4604         aj     = buf_rj[k] + *(nextai[k]);
4605         aa     = abuf_r[k] + *(nextai[k]);
4606         nextaj = 0;
4607         for (j=0; nextaj<anzi; j++) {
4608           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4609             ba_i[j] += aa[nextaj++];
4610           }
4611         }
4612         nextrow[k]++; nextai[k]++;
4613       }
4614     }
4615     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4616   }
4617   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4618   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4619 
4620   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4621   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4622   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4623   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4624   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4625   PetscFunctionReturn(0);
4626 }
4627 
4628 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4629 {
4630   PetscErrorCode      ierr;
4631   Mat                 B_mpi;
4632   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4633   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4634   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4635   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4636   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4637   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4638   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4639   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4640   MPI_Status          *status;
4641   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4642   PetscBT             lnkbt;
4643   Mat_Merge_SeqsToMPI *merge;
4644   PetscContainer      container;
4645 
4646   PetscFunctionBegin;
4647   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4648 
4649   /* make sure it is a PETSc comm */
4650   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4652   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4653 
4654   ierr = PetscNew(&merge);CHKERRQ(ierr);
4655   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4656 
4657   /* determine row ownership */
4658   /*---------------------------------------------------------*/
4659   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4660   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4661   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4662   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4663   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4664   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4665   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4666 
4667   m      = merge->rowmap->n;
4668   owners = merge->rowmap->range;
4669 
4670   /* determine the number of messages to send, their lengths */
4671   /*---------------------------------------------------------*/
4672   len_s = merge->len_s;
4673 
4674   len          = 0; /* length of buf_si[] */
4675   merge->nsend = 0;
4676   for (proc=0; proc<size; proc++) {
4677     len_si[proc] = 0;
4678     if (proc == rank) {
4679       len_s[proc] = 0;
4680     } else {
4681       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4682       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4683     }
4684     if (len_s[proc]) {
4685       merge->nsend++;
4686       nrows = 0;
4687       for (i=owners[proc]; i<owners[proc+1]; i++) {
4688         if (ai[i+1] > ai[i]) nrows++;
4689       }
4690       len_si[proc] = 2*(nrows+1);
4691       len         += len_si[proc];
4692     }
4693   }
4694 
4695   /* determine the number and length of messages to receive for ij-structure */
4696   /*-------------------------------------------------------------------------*/
4697   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4698   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4699 
4700   /* post the Irecv of j-structure */
4701   /*-------------------------------*/
4702   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4703   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4704 
4705   /* post the Isend of j-structure */
4706   /*--------------------------------*/
4707   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4708 
4709   for (proc=0, k=0; proc<size; proc++) {
4710     if (!len_s[proc]) continue;
4711     i    = owners[proc];
4712     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4713     k++;
4714   }
4715 
4716   /* receives and sends of j-structure are complete */
4717   /*------------------------------------------------*/
4718   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4719   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4720 
4721   /* send and recv i-structure */
4722   /*---------------------------*/
4723   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4724   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4725 
4726   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4727   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4728   for (proc=0,k=0; proc<size; proc++) {
4729     if (!len_s[proc]) continue;
4730     /* form outgoing message for i-structure:
4731          buf_si[0]:                 nrows to be sent
4732                [1:nrows]:           row index (global)
4733                [nrows+1:2*nrows+1]: i-structure index
4734     */
4735     /*-------------------------------------------*/
4736     nrows       = len_si[proc]/2 - 1;
4737     buf_si_i    = buf_si + nrows+1;
4738     buf_si[0]   = nrows;
4739     buf_si_i[0] = 0;
4740     nrows       = 0;
4741     for (i=owners[proc]; i<owners[proc+1]; i++) {
4742       anzi = ai[i+1] - ai[i];
4743       if (anzi) {
4744         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4745         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4746         nrows++;
4747       }
4748     }
4749     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4750     k++;
4751     buf_si += len_si[proc];
4752   }
4753 
4754   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4755   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4756 
4757   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4758   for (i=0; i<merge->nrecv; i++) {
4759     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4760   }
4761 
4762   ierr = PetscFree(len_si);CHKERRQ(ierr);
4763   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4764   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4765   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4766   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4767   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4768   ierr = PetscFree(status);CHKERRQ(ierr);
4769 
4770   /* compute a local seq matrix in each processor */
4771   /*----------------------------------------------*/
4772   /* allocate bi array and free space for accumulating nonzero column info */
4773   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4774   bi[0] = 0;
4775 
4776   /* create and initialize a linked list */
4777   nlnk = N+1;
4778   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4779 
4780   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4781   len  = ai[owners[rank+1]] - ai[owners[rank]];
4782   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4783 
4784   current_space = free_space;
4785 
4786   /* determine symbolic info for each local row */
4787   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4788 
4789   for (k=0; k<merge->nrecv; k++) {
4790     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4791     nrows       = *buf_ri_k[k];
4792     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4793     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4794   }
4795 
4796   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4797   len  = 0;
4798   for (i=0; i<m; i++) {
4799     bnzi = 0;
4800     /* add local non-zero cols of this proc's seqmat into lnk */
4801     arow  = owners[rank] + i;
4802     anzi  = ai[arow+1] - ai[arow];
4803     aj    = a->j + ai[arow];
4804     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4805     bnzi += nlnk;
4806     /* add received col data into lnk */
4807     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4808       if (i == *nextrow[k]) { /* i-th row */
4809         anzi  = *(nextai[k]+1) - *nextai[k];
4810         aj    = buf_rj[k] + *nextai[k];
4811         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4812         bnzi += nlnk;
4813         nextrow[k]++; nextai[k]++;
4814       }
4815     }
4816     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4817 
4818     /* if free space is not available, make more free space */
4819     if (current_space->local_remaining<bnzi) {
4820       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4821       nspacedouble++;
4822     }
4823     /* copy data into free space, then initialize lnk */
4824     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4825     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4826 
4827     current_space->array           += bnzi;
4828     current_space->local_used      += bnzi;
4829     current_space->local_remaining -= bnzi;
4830 
4831     bi[i+1] = bi[i] + bnzi;
4832   }
4833 
4834   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4835 
4836   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4837   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4838   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4839 
4840   /* create symbolic parallel matrix B_mpi */
4841   /*---------------------------------------*/
4842   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4843   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4844   if (n==PETSC_DECIDE) {
4845     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4846   } else {
4847     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4848   }
4849   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4850   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4851   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4852   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4853   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4854 
4855   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4856   B_mpi->assembled    = PETSC_FALSE;
4857   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4858   merge->bi           = bi;
4859   merge->bj           = bj;
4860   merge->buf_ri       = buf_ri;
4861   merge->buf_rj       = buf_rj;
4862   merge->coi          = NULL;
4863   merge->coj          = NULL;
4864   merge->owners_co    = NULL;
4865 
4866   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4867 
4868   /* attach the supporting struct to B_mpi for reuse */
4869   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4870   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4871   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4872   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4873   *mpimat = B_mpi;
4874 
4875   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4876   PetscFunctionReturn(0);
4877 }
4878 
4879 /*@C
4880       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4881                  matrices from each processor
4882 
4883     Collective
4884 
4885    Input Parameters:
4886 +    comm - the communicators the parallel matrix will live on
4887 .    seqmat - the input sequential matrices
4888 .    m - number of local rows (or PETSC_DECIDE)
4889 .    n - number of local columns (or PETSC_DECIDE)
4890 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4891 
4892    Output Parameter:
4893 .    mpimat - the parallel matrix generated
4894 
4895     Level: advanced
4896 
4897    Notes:
4898      The dimensions of the sequential matrix in each processor MUST be the same.
4899      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4900      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4901 @*/
4902 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4903 {
4904   PetscErrorCode ierr;
4905   PetscMPIInt    size;
4906 
4907   PetscFunctionBegin;
4908   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4909   if (size == 1) {
4910     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4911     if (scall == MAT_INITIAL_MATRIX) {
4912       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4913     } else {
4914       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4915     }
4916     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4917     PetscFunctionReturn(0);
4918   }
4919   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920   if (scall == MAT_INITIAL_MATRIX) {
4921     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4922   }
4923   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4924   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 /*@
4929      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4930           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4931           with MatGetSize()
4932 
4933     Not Collective
4934 
4935    Input Parameters:
4936 +    A - the matrix
4937 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4938 
4939    Output Parameter:
4940 .    A_loc - the local sequential matrix generated
4941 
4942     Level: developer
4943 
4944    Notes:
4945      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4946      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4947      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4948      modify the values of the returned A_loc.
4949 
4950 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4951 
4952 @*/
4953 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4954 {
4955   PetscErrorCode ierr;
4956   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4957   Mat_SeqAIJ     *mat,*a,*b;
4958   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4959   MatScalar      *aa,*ba,*cam;
4960   PetscScalar    *ca;
4961   PetscMPIInt    size;
4962   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4963   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4964   PetscBool      match;
4965 
4966   PetscFunctionBegin;
4967   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
4968   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4969   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
4970   if (size == 1) {
4971     if (scall == MAT_INITIAL_MATRIX) {
4972       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
4973       *A_loc = mpimat->A;
4974     } else if (scall == MAT_REUSE_MATRIX) {
4975       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4976     }
4977     PetscFunctionReturn(0);
4978   }
4979 
4980   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4981   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4982   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4983   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4984   aa = a->a; ba = b->a;
4985   if (scall == MAT_INITIAL_MATRIX) {
4986     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4987     ci[0] = 0;
4988     for (i=0; i<am; i++) {
4989       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4990     }
4991     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4992     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4993     k    = 0;
4994     for (i=0; i<am; i++) {
4995       ncols_o = bi[i+1] - bi[i];
4996       ncols_d = ai[i+1] - ai[i];
4997       /* off-diagonal portion of A */
4998       for (jo=0; jo<ncols_o; jo++) {
4999         col = cmap[*bj];
5000         if (col >= cstart) break;
5001         cj[k]   = col; bj++;
5002         ca[k++] = *ba++;
5003       }
5004       /* diagonal portion of A */
5005       for (j=0; j<ncols_d; j++) {
5006         cj[k]   = cstart + *aj++;
5007         ca[k++] = *aa++;
5008       }
5009       /* off-diagonal portion of A */
5010       for (j=jo; j<ncols_o; j++) {
5011         cj[k]   = cmap[*bj++];
5012         ca[k++] = *ba++;
5013       }
5014     }
5015     /* put together the new matrix */
5016     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5017     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5018     /* Since these are PETSc arrays, change flags to free them as necessary. */
5019     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5020     mat->free_a  = PETSC_TRUE;
5021     mat->free_ij = PETSC_TRUE;
5022     mat->nonew   = 0;
5023   } else if (scall == MAT_REUSE_MATRIX) {
5024     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5025     ci = mat->i; cj = mat->j; cam = mat->a;
5026     for (i=0; i<am; i++) {
5027       /* off-diagonal portion of A */
5028       ncols_o = bi[i+1] - bi[i];
5029       for (jo=0; jo<ncols_o; jo++) {
5030         col = cmap[*bj];
5031         if (col >= cstart) break;
5032         *cam++ = *ba++; bj++;
5033       }
5034       /* diagonal portion of A */
5035       ncols_d = ai[i+1] - ai[i];
5036       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5037       /* off-diagonal portion of A */
5038       for (j=jo; j<ncols_o; j++) {
5039         *cam++ = *ba++; bj++;
5040       }
5041     }
5042   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5043   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5044   PetscFunctionReturn(0);
5045 }
5046 
5047 /*@C
5048      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5049 
5050     Not Collective
5051 
5052    Input Parameters:
5053 +    A - the matrix
5054 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5055 -    row, col - index sets of rows and columns to extract (or NULL)
5056 
5057    Output Parameter:
5058 .    A_loc - the local sequential matrix generated
5059 
5060     Level: developer
5061 
5062 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5063 
5064 @*/
5065 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5066 {
5067   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5068   PetscErrorCode ierr;
5069   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5070   IS             isrowa,iscola;
5071   Mat            *aloc;
5072   PetscBool      match;
5073 
5074   PetscFunctionBegin;
5075   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5076   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5077   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5078   if (!row) {
5079     start = A->rmap->rstart; end = A->rmap->rend;
5080     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5081   } else {
5082     isrowa = *row;
5083   }
5084   if (!col) {
5085     start = A->cmap->rstart;
5086     cmap  = a->garray;
5087     nzA   = a->A->cmap->n;
5088     nzB   = a->B->cmap->n;
5089     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5090     ncols = 0;
5091     for (i=0; i<nzB; i++) {
5092       if (cmap[i] < start) idx[ncols++] = cmap[i];
5093       else break;
5094     }
5095     imark = i;
5096     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5097     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5098     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5099   } else {
5100     iscola = *col;
5101   }
5102   if (scall != MAT_INITIAL_MATRIX) {
5103     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5104     aloc[0] = *A_loc;
5105   }
5106   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5107   if (!col) { /* attach global id of condensed columns */
5108     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5109   }
5110   *A_loc = aloc[0];
5111   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5112   if (!row) {
5113     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5114   }
5115   if (!col) {
5116     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5117   }
5118   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5119   PetscFunctionReturn(0);
5120 }
5121 
5122 /*
5123  * Destroy a mat that may be composed with PetscSF communication objects.
5124  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5125  * */
5126 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5127 {
5128   PetscSF          sf,osf;
5129   IS               map;
5130   PetscErrorCode   ierr;
5131 
5132   PetscFunctionBegin;
5133   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5134   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5135   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5136   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5137   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5138   ierr = ISDestroy(&map);CHKERRQ(ierr);
5139   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5140   PetscFunctionReturn(0);
5141 }
5142 
5143 /*
5144  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5145  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5146  * on a global size.
5147  * */
5148 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5149 {
5150   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5151   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5152   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5153   PetscMPIInt              owner;
5154   PetscSFNode              *iremote,*oiremote;
5155   const PetscInt           *lrowindices;
5156   PetscErrorCode           ierr;
5157   PetscSF                  sf,osf;
5158   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5159   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5160   MPI_Comm                 comm;
5161   ISLocalToGlobalMapping   mapping;
5162 
5163   PetscFunctionBegin;
5164   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5165   /* plocalsize is the number of roots
5166    * nrows is the number of leaves
5167    * */
5168   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5169   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5170   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5171   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5172   for (i=0;i<nrows;i++) {
5173     /* Find a remote index and an owner for a row
5174      * The row could be local or remote
5175      * */
5176     owner = 0;
5177     lidx  = 0;
5178     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5179     iremote[i].index = lidx;
5180     iremote[i].rank  = owner;
5181   }
5182   /* Create SF to communicate how many nonzero columns for each row */
5183   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5184   /* SF will figure out the number of nonzero colunms for each row, and their
5185    * offsets
5186    * */
5187   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5188   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5189   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5190 
5191   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5192   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5193   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5194   roffsets[0] = 0;
5195   roffsets[1] = 0;
5196   for (i=0;i<plocalsize;i++) {
5197     /* diag */
5198     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5199     /* off diag */
5200     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5201     /* compute offsets so that we relative location for each row */
5202     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5203     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5204   }
5205   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5206   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5207   /* 'r' means root, and 'l' means leaf */
5208   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5209   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5210   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5211   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5212   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5213   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5214   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5215   dntotalcols = 0;
5216   ontotalcols = 0;
5217   ncol = 0;
5218   for (i=0;i<nrows;i++) {
5219     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5220     ncol = PetscMax(pnnz[i],ncol);
5221     /* diag */
5222     dntotalcols += nlcols[i*2+0];
5223     /* off diag */
5224     ontotalcols += nlcols[i*2+1];
5225   }
5226   /* We do not need to figure the right number of columns
5227    * since all the calculations will be done by going through the raw data
5228    * */
5229   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5230   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5231   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5232   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5233   /* diag */
5234   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5235   /* off diag */
5236   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5237   /* diag */
5238   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5239   /* off diag */
5240   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5241   dntotalcols = 0;
5242   ontotalcols = 0;
5243   ntotalcols  = 0;
5244   for (i=0;i<nrows;i++) {
5245     owner = 0;
5246     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5247     /* Set iremote for diag matrix */
5248     for (j=0;j<nlcols[i*2+0];j++) {
5249       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5250       iremote[dntotalcols].rank    = owner;
5251       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5252       ilocal[dntotalcols++]        = ntotalcols++;
5253     }
5254     /* off diag */
5255     for (j=0;j<nlcols[i*2+1];j++) {
5256       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5257       oiremote[ontotalcols].rank    = owner;
5258       oilocal[ontotalcols++]        = ntotalcols++;
5259     }
5260   }
5261   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5262   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5263   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5264   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5265   /* P serves as roots and P_oth is leaves
5266    * Diag matrix
5267    * */
5268   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5269   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5270   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5271 
5272   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5273   /* Off diag */
5274   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5275   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5276   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5277   /* We operate on the matrix internal data for saving memory */
5278   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5279   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5280   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5281   /* Convert to global indices for diag matrix */
5282   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5283   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5284   /* We want P_oth store global indices */
5285   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5286   /* Use memory scalable approach */
5287   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5288   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5289   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5290   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5291   /* Convert back to local indices */
5292   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5293   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5294   nout = 0;
5295   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5296   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5297   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5298   /* Exchange values */
5299   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5300   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5301   /* Stop PETSc from shrinking memory */
5302   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5303   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5304   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5305   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5306   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5307   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5308   /* ``New MatDestroy" takes care of PetscSF objects as well */
5309   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5310   PetscFunctionReturn(0);
5311 }
5312 
5313 /*
5314  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5315  * This supports MPIAIJ and MAIJ
5316  * */
5317 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5318 {
5319   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5320   Mat_SeqAIJ            *p_oth;
5321   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5322   IS                    rows,map;
5323   PetscHMapI            hamp;
5324   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5325   MPI_Comm              comm;
5326   PetscSF               sf,osf;
5327   PetscBool             has;
5328   PetscErrorCode        ierr;
5329 
5330   PetscFunctionBegin;
5331   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5332   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5333   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5334    *  and then create a submatrix (that often is an overlapping matrix)
5335    * */
5336   if (reuse==MAT_INITIAL_MATRIX) {
5337     /* Use a hash table to figure out unique keys */
5338     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5339     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5340     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5341     count = 0;
5342     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5343     for (i=0;i<a->B->cmap->n;i++) {
5344       key  = a->garray[i]/dof;
5345       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5346       if (!has) {
5347         mapping[i] = count;
5348         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5349       } else {
5350         /* Current 'i' has the same value the previous step */
5351         mapping[i] = count-1;
5352       }
5353     }
5354     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5355     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5356     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5357     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5358     off = 0;
5359     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5360     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5361     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5362     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5363     /* In case, the matrix was already created but users want to recreate the matrix */
5364     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5365     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5366     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5367     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5368   } else if (reuse==MAT_REUSE_MATRIX) {
5369     /* If matrix was already created, we simply update values using SF objects
5370      * that as attached to the matrix ealier.
5371      *  */
5372     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5373     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5374     if (!sf || !osf) {
5375       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5376     }
5377     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5378     /* Update values in place */
5379     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5380     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5381     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5382     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5383   } else {
5384     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5385   }
5386   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5387   PetscFunctionReturn(0);
5388 }
5389 
5390 /*@C
5391     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5392 
5393     Collective on Mat
5394 
5395    Input Parameters:
5396 +    A,B - the matrices in mpiaij format
5397 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5398 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5399 
5400    Output Parameter:
5401 +    rowb, colb - index sets of rows and columns of B to extract
5402 -    B_seq - the sequential matrix generated
5403 
5404     Level: developer
5405 
5406 @*/
5407 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5408 {
5409   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5410   PetscErrorCode ierr;
5411   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5412   IS             isrowb,iscolb;
5413   Mat            *bseq=NULL;
5414 
5415   PetscFunctionBegin;
5416   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5417     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5418   }
5419   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5420 
5421   if (scall == MAT_INITIAL_MATRIX) {
5422     start = A->cmap->rstart;
5423     cmap  = a->garray;
5424     nzA   = a->A->cmap->n;
5425     nzB   = a->B->cmap->n;
5426     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5427     ncols = 0;
5428     for (i=0; i<nzB; i++) {  /* row < local row index */
5429       if (cmap[i] < start) idx[ncols++] = cmap[i];
5430       else break;
5431     }
5432     imark = i;
5433     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5434     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5435     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5436     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5437   } else {
5438     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5439     isrowb  = *rowb; iscolb = *colb;
5440     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5441     bseq[0] = *B_seq;
5442   }
5443   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5444   *B_seq = bseq[0];
5445   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5446   if (!rowb) {
5447     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5448   } else {
5449     *rowb = isrowb;
5450   }
5451   if (!colb) {
5452     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5453   } else {
5454     *colb = iscolb;
5455   }
5456   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5457   PetscFunctionReturn(0);
5458 }
5459 
5460 /*
5461     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5462     of the OFF-DIAGONAL portion of local A
5463 
5464     Collective on Mat
5465 
5466    Input Parameters:
5467 +    A,B - the matrices in mpiaij format
5468 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5469 
5470    Output Parameter:
5471 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5472 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5473 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5474 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5475 
5476     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5477      for this matrix. This is not desirable..
5478 
5479     Level: developer
5480 
5481 */
5482 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5483 {
5484   PetscErrorCode         ierr;
5485   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5486   Mat_SeqAIJ             *b_oth;
5487   VecScatter             ctx;
5488   MPI_Comm               comm;
5489   const PetscMPIInt      *rprocs,*sprocs;
5490   const PetscInt         *srow,*rstarts,*sstarts;
5491   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5492   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5493   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5494   MPI_Request            *rwaits = NULL,*swaits = NULL;
5495   MPI_Status             rstatus;
5496   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5497 
5498   PetscFunctionBegin;
5499   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5500   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5501 
5502   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5503     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5504   }
5505   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5506   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5507 
5508   if (size == 1) {
5509     startsj_s = NULL;
5510     bufa_ptr  = NULL;
5511     *B_oth    = NULL;
5512     PetscFunctionReturn(0);
5513   }
5514 
5515   ctx = a->Mvctx;
5516   tag = ((PetscObject)ctx)->tag;
5517 
5518   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5519   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5520   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5521   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5522   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5523   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5524   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5525 
5526   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5527   if (scall == MAT_INITIAL_MATRIX) {
5528     /* i-array */
5529     /*---------*/
5530     /*  post receives */
5531     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5532     for (i=0; i<nrecvs; i++) {
5533       rowlen = rvalues + rstarts[i]*rbs;
5534       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5535       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5536     }
5537 
5538     /* pack the outgoing message */
5539     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5540 
5541     sstartsj[0] = 0;
5542     rstartsj[0] = 0;
5543     len         = 0; /* total length of j or a array to be sent */
5544     if (nsends) {
5545       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5546       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5547     }
5548     for (i=0; i<nsends; i++) {
5549       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5550       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5551       for (j=0; j<nrows; j++) {
5552         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5553         for (l=0; l<sbs; l++) {
5554           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5555 
5556           rowlen[j*sbs+l] = ncols;
5557 
5558           len += ncols;
5559           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5560         }
5561         k++;
5562       }
5563       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5564 
5565       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5566     }
5567     /* recvs and sends of i-array are completed */
5568     i = nrecvs;
5569     while (i--) {
5570       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5571     }
5572     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5573     ierr = PetscFree(svalues);CHKERRQ(ierr);
5574 
5575     /* allocate buffers for sending j and a arrays */
5576     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5577     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5578 
5579     /* create i-array of B_oth */
5580     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5581 
5582     b_othi[0] = 0;
5583     len       = 0; /* total length of j or a array to be received */
5584     k         = 0;
5585     for (i=0; i<nrecvs; i++) {
5586       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5587       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5588       for (j=0; j<nrows; j++) {
5589         b_othi[k+1] = b_othi[k] + rowlen[j];
5590         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5591         k++;
5592       }
5593       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5594     }
5595     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5596 
5597     /* allocate space for j and a arrrays of B_oth */
5598     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5599     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5600 
5601     /* j-array */
5602     /*---------*/
5603     /*  post receives of j-array */
5604     for (i=0; i<nrecvs; i++) {
5605       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5606       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5607     }
5608 
5609     /* pack the outgoing message j-array */
5610     if (nsends) k = sstarts[0];
5611     for (i=0; i<nsends; i++) {
5612       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5613       bufJ  = bufj+sstartsj[i];
5614       for (j=0; j<nrows; j++) {
5615         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5616         for (ll=0; ll<sbs; ll++) {
5617           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5618           for (l=0; l<ncols; l++) {
5619             *bufJ++ = cols[l];
5620           }
5621           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5622         }
5623       }
5624       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5625     }
5626 
5627     /* recvs and sends of j-array are completed */
5628     i = nrecvs;
5629     while (i--) {
5630       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5631     }
5632     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5633   } else if (scall == MAT_REUSE_MATRIX) {
5634     sstartsj = *startsj_s;
5635     rstartsj = *startsj_r;
5636     bufa     = *bufa_ptr;
5637     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5638     b_otha   = b_oth->a;
5639   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5640 
5641   /* a-array */
5642   /*---------*/
5643   /*  post receives of a-array */
5644   for (i=0; i<nrecvs; i++) {
5645     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5646     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5647   }
5648 
5649   /* pack the outgoing message a-array */
5650   if (nsends) k = sstarts[0];
5651   for (i=0; i<nsends; i++) {
5652     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5653     bufA  = bufa+sstartsj[i];
5654     for (j=0; j<nrows; j++) {
5655       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5656       for (ll=0; ll<sbs; ll++) {
5657         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5658         for (l=0; l<ncols; l++) {
5659           *bufA++ = vals[l];
5660         }
5661         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5662       }
5663     }
5664     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5665   }
5666   /* recvs and sends of a-array are completed */
5667   i = nrecvs;
5668   while (i--) {
5669     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5670   }
5671   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5672   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5673 
5674   if (scall == MAT_INITIAL_MATRIX) {
5675     /* put together the new matrix */
5676     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5677 
5678     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5679     /* Since these are PETSc arrays, change flags to free them as necessary. */
5680     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5681     b_oth->free_a  = PETSC_TRUE;
5682     b_oth->free_ij = PETSC_TRUE;
5683     b_oth->nonew   = 0;
5684 
5685     ierr = PetscFree(bufj);CHKERRQ(ierr);
5686     if (!startsj_s || !bufa_ptr) {
5687       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5688       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5689     } else {
5690       *startsj_s = sstartsj;
5691       *startsj_r = rstartsj;
5692       *bufa_ptr  = bufa;
5693     }
5694   }
5695 
5696   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5697   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5698   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5699   PetscFunctionReturn(0);
5700 }
5701 
5702 /*@C
5703   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5704 
5705   Not Collective
5706 
5707   Input Parameters:
5708 . A - The matrix in mpiaij format
5709 
5710   Output Parameter:
5711 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5712 . colmap - A map from global column index to local index into lvec
5713 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5714 
5715   Level: developer
5716 
5717 @*/
5718 #if defined(PETSC_USE_CTABLE)
5719 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5720 #else
5721 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5722 #endif
5723 {
5724   Mat_MPIAIJ *a;
5725 
5726   PetscFunctionBegin;
5727   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5728   PetscValidPointer(lvec, 2);
5729   PetscValidPointer(colmap, 3);
5730   PetscValidPointer(multScatter, 4);
5731   a = (Mat_MPIAIJ*) A->data;
5732   if (lvec) *lvec = a->lvec;
5733   if (colmap) *colmap = a->colmap;
5734   if (multScatter) *multScatter = a->Mvctx;
5735   PetscFunctionReturn(0);
5736 }
5737 
5738 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5739 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5740 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5741 #if defined(PETSC_HAVE_MKL_SPARSE)
5742 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5743 #endif
5744 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5745 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5746 #if defined(PETSC_HAVE_ELEMENTAL)
5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5748 #endif
5749 #if defined(PETSC_HAVE_HYPRE)
5750 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5751 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5752 #endif
5753 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5754 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5755 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5756 
5757 /*
5758     Computes (B'*A')' since computing B*A directly is untenable
5759 
5760                n                       p                          p
5761         (              )       (              )         (                  )
5762       m (      A       )  *  n (       B      )   =   m (         C        )
5763         (              )       (              )         (                  )
5764 
5765 */
5766 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5767 {
5768   PetscErrorCode ierr;
5769   Mat            At,Bt,Ct;
5770 
5771   PetscFunctionBegin;
5772   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5773   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5774   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5775   ierr = MatDestroy(&At);CHKERRQ(ierr);
5776   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5777   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5778   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5779   PetscFunctionReturn(0);
5780 }
5781 
5782 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5783 {
5784   PetscErrorCode ierr;
5785   PetscInt       m=A->rmap->n,n=B->cmap->n;
5786   Mat            Cmat;
5787 
5788   PetscFunctionBegin;
5789   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5790   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5791   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5792   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5793   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5794   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5795   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5796   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5797 
5798   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5799 
5800   *C = Cmat;
5801   PetscFunctionReturn(0);
5802 }
5803 
5804 /* ----------------------------------------------------------------*/
5805 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5806 {
5807   PetscErrorCode ierr;
5808 
5809   PetscFunctionBegin;
5810   if (scall == MAT_INITIAL_MATRIX) {
5811     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5812     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5813     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5814   }
5815   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5816   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5817   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5818   PetscFunctionReturn(0);
5819 }
5820 
5821 /*MC
5822    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5823 
5824    Options Database Keys:
5825 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5826 
5827    Level: beginner
5828 
5829    Notes:
5830     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5831     in this case the values associated with the rows and columns one passes in are set to zero
5832     in the matrix
5833 
5834     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5835     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5836 
5837 .seealso: MatCreateAIJ()
5838 M*/
5839 
5840 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5841 {
5842   Mat_MPIAIJ     *b;
5843   PetscErrorCode ierr;
5844   PetscMPIInt    size;
5845 
5846   PetscFunctionBegin;
5847   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5848 
5849   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5850   B->data       = (void*)b;
5851   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5852   B->assembled  = PETSC_FALSE;
5853   B->insertmode = NOT_SET_VALUES;
5854   b->size       = size;
5855 
5856   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5857 
5858   /* build cache for off array entries formed */
5859   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5860 
5861   b->donotstash  = PETSC_FALSE;
5862   b->colmap      = 0;
5863   b->garray      = 0;
5864   b->roworiented = PETSC_TRUE;
5865 
5866   /* stuff used for matrix vector multiply */
5867   b->lvec  = NULL;
5868   b->Mvctx = NULL;
5869 
5870   /* stuff for MatGetRow() */
5871   b->rowindices   = 0;
5872   b->rowvalues    = 0;
5873   b->getrowactive = PETSC_FALSE;
5874 
5875   /* flexible pointer used in CUSP/CUSPARSE classes */
5876   b->spptr = NULL;
5877 
5878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5879   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5880   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5881   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5882   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5884   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5885   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5888 #if defined(PETSC_HAVE_MKL_SPARSE)
5889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5890 #endif
5891   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
5893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5894 #if defined(PETSC_HAVE_ELEMENTAL)
5895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5896 #endif
5897 #if defined(PETSC_HAVE_HYPRE)
5898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5899 #endif
5900   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5901   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5902   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5903   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5904   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5905 #if defined(PETSC_HAVE_HYPRE)
5906   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5907 #endif
5908   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5909   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5910   PetscFunctionReturn(0);
5911 }
5912 
5913 /*@C
5914      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5915          and "off-diagonal" part of the matrix in CSR format.
5916 
5917    Collective
5918 
5919    Input Parameters:
5920 +  comm - MPI communicator
5921 .  m - number of local rows (Cannot be PETSC_DECIDE)
5922 .  n - This value should be the same as the local size used in creating the
5923        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5924        calculated if N is given) For square matrices n is almost always m.
5925 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5926 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5927 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5928 .   j - column indices
5929 .   a - matrix values
5930 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5931 .   oj - column indices
5932 -   oa - matrix values
5933 
5934    Output Parameter:
5935 .   mat - the matrix
5936 
5937    Level: advanced
5938 
5939    Notes:
5940        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5941        must free the arrays once the matrix has been destroyed and not before.
5942 
5943        The i and j indices are 0 based
5944 
5945        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5946 
5947        This sets local rows and cannot be used to set off-processor values.
5948 
5949        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5950        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5951        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5952        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5953        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5954        communication if it is known that only local entries will be set.
5955 
5956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5957           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5958 @*/
5959 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5960 {
5961   PetscErrorCode ierr;
5962   Mat_MPIAIJ     *maij;
5963 
5964   PetscFunctionBegin;
5965   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5966   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5967   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5968   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5969   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5970   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5971   maij = (Mat_MPIAIJ*) (*mat)->data;
5972 
5973   (*mat)->preallocated = PETSC_TRUE;
5974 
5975   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5976   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5977 
5978   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5979   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5980 
5981   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5982   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5983   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5984   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5985 
5986   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5987   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5988   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5989   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5990   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5991   PetscFunctionReturn(0);
5992 }
5993 
5994 /*
5995     Special version for direct calls from Fortran
5996 */
5997 #include <petsc/private/fortranimpl.h>
5998 
5999 /* Change these macros so can be used in void function */
6000 #undef CHKERRQ
6001 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6002 #undef SETERRQ2
6003 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6004 #undef SETERRQ3
6005 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6006 #undef SETERRQ
6007 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6008 
6009 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6010 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6011 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6012 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6013 #else
6014 #endif
6015 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6016 {
6017   Mat            mat  = *mmat;
6018   PetscInt       m    = *mm, n = *mn;
6019   InsertMode     addv = *maddv;
6020   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6021   PetscScalar    value;
6022   PetscErrorCode ierr;
6023 
6024   MatCheckPreallocated(mat,1);
6025   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6026 
6027 #if defined(PETSC_USE_DEBUG)
6028   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6029 #endif
6030   {
6031     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6032     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6033     PetscBool roworiented = aij->roworiented;
6034 
6035     /* Some Variables required in the macro */
6036     Mat        A                    = aij->A;
6037     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6038     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6039     MatScalar  *aa                  = a->a;
6040     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6041     Mat        B                    = aij->B;
6042     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6043     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6044     MatScalar  *ba                  = b->a;
6045     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6046      * cannot use "#if defined" inside a macro. */
6047     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6048 
6049     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6050     PetscInt  nonew = a->nonew;
6051     MatScalar *ap1,*ap2;
6052 
6053     PetscFunctionBegin;
6054     for (i=0; i<m; i++) {
6055       if (im[i] < 0) continue;
6056 #if defined(PETSC_USE_DEBUG)
6057       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6058 #endif
6059       if (im[i] >= rstart && im[i] < rend) {
6060         row      = im[i] - rstart;
6061         lastcol1 = -1;
6062         rp1      = aj + ai[row];
6063         ap1      = aa + ai[row];
6064         rmax1    = aimax[row];
6065         nrow1    = ailen[row];
6066         low1     = 0;
6067         high1    = nrow1;
6068         lastcol2 = -1;
6069         rp2      = bj + bi[row];
6070         ap2      = ba + bi[row];
6071         rmax2    = bimax[row];
6072         nrow2    = bilen[row];
6073         low2     = 0;
6074         high2    = nrow2;
6075 
6076         for (j=0; j<n; j++) {
6077           if (roworiented) value = v[i*n+j];
6078           else value = v[i+j*m];
6079           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6080           if (in[j] >= cstart && in[j] < cend) {
6081             col = in[j] - cstart;
6082             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6083 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6084             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6085 #endif
6086           } else if (in[j] < 0) continue;
6087 #if defined(PETSC_USE_DEBUG)
6088           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6089           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6090 #endif
6091           else {
6092             if (mat->was_assembled) {
6093               if (!aij->colmap) {
6094                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6095               }
6096 #if defined(PETSC_USE_CTABLE)
6097               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6098               col--;
6099 #else
6100               col = aij->colmap[in[j]] - 1;
6101 #endif
6102               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6103                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6104                 col  =  in[j];
6105                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6106                 B        = aij->B;
6107                 b        = (Mat_SeqAIJ*)B->data;
6108                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6109                 rp2      = bj + bi[row];
6110                 ap2      = ba + bi[row];
6111                 rmax2    = bimax[row];
6112                 nrow2    = bilen[row];
6113                 low2     = 0;
6114                 high2    = nrow2;
6115                 bm       = aij->B->rmap->n;
6116                 ba       = b->a;
6117                 inserted = PETSC_FALSE;
6118               }
6119             } else col = in[j];
6120             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6121 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6122             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6123 #endif
6124           }
6125         }
6126       } else if (!aij->donotstash) {
6127         if (roworiented) {
6128           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6129         } else {
6130           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6131         }
6132       }
6133     }
6134   }
6135   PetscFunctionReturnVoid();
6136 }
6137