xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f07e67ed86ede464a9faddc80617894c11508dd6)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = NULL;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = NULL;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818   case MAT_SORTED_FULL:
1819     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1820     break;
1821   case MAT_IGNORE_OFF_PROC_ENTRIES:
1822     a->donotstash = flg;
1823     break;
1824   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1825   case MAT_SPD:
1826   case MAT_SYMMETRIC:
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828   case MAT_HERMITIAN:
1829   case MAT_SYMMETRY_ETERNAL:
1830     break;
1831   case MAT_SUBMAT_SINGLEIS:
1832     A->submat_singleis = flg;
1833     break;
1834   case MAT_STRUCTURE_ONLY:
1835     /* The option is handled directly by MatSetOption() */
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1844 {
1845   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1846   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1847   PetscErrorCode ierr;
1848   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1849   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1850   PetscInt       *cmap,*idx_p;
1851 
1852   PetscFunctionBegin;
1853   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1854   mat->getrowactive = PETSC_TRUE;
1855 
1856   if (!mat->rowvalues && (idx || v)) {
1857     /*
1858         allocate enough space to hold information from the longest row.
1859     */
1860     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1861     PetscInt   max = 1,tmp;
1862     for (i=0; i<matin->rmap->n; i++) {
1863       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1864       if (max < tmp) max = tmp;
1865     }
1866     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1867   }
1868 
1869   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1870   lrow = row - rstart;
1871 
1872   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1873   if (!v)   {pvA = NULL; pvB = NULL;}
1874   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1875   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1876   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1877   nztot = nzA + nzB;
1878 
1879   cmap = mat->garray;
1880   if (v  || idx) {
1881     if (nztot) {
1882       /* Sort by increasing column numbers, assuming A and B already sorted */
1883       PetscInt imark = -1;
1884       if (v) {
1885         *v = v_p = mat->rowvalues;
1886         for (i=0; i<nzB; i++) {
1887           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1888           else break;
1889         }
1890         imark = i;
1891         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1892         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1893       }
1894       if (idx) {
1895         *idx = idx_p = mat->rowindices;
1896         if (imark > -1) {
1897           for (i=0; i<imark; i++) {
1898             idx_p[i] = cmap[cworkB[i]];
1899           }
1900         } else {
1901           for (i=0; i<nzB; i++) {
1902             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1903             else break;
1904           }
1905           imark = i;
1906         }
1907         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1908         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1909       }
1910     } else {
1911       if (idx) *idx = NULL;
1912       if (v)   *v   = NULL;
1913     }
1914   }
1915   *nz  = nztot;
1916   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1917   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1922 {
1923   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1924 
1925   PetscFunctionBegin;
1926   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1927   aij->getrowactive = PETSC_FALSE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1932 {
1933   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1934   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       i,j,cstart = mat->cmap->rstart;
1937   PetscReal      sum = 0.0;
1938   MatScalar      *v;
1939 
1940   PetscFunctionBegin;
1941   if (aij->size == 1) {
1942     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1943   } else {
1944     if (type == NORM_FROBENIUS) {
1945       v = amat->a;
1946       for (i=0; i<amat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1960       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v     = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v   = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1999 {
2000   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2001   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2002   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2003   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2004   PetscErrorCode  ierr;
2005   Mat             B,A_diag,*B_diag;
2006   const MatScalar *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2237        will be updated */
2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2239     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2240       Y->offloadmask = PETSC_OFFLOAD_CPU;
2241     }
2242 #endif
2243   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2244     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2245   } else {
2246     Mat      B;
2247     PetscInt *nnz_d,*nnz_o;
2248     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2249     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2250     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2251     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2252     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2253     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2256     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2257     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2258     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2266 
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2283 {
2284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2289   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2305 {
2306   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode    ierr;
2308   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2309   PetscScalar       *va,*vv;
2310   Vec               vB,vA;
2311   const PetscScalar *vb;
2312 
2313   PetscFunctionBegin;
2314   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2315   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2316 
2317   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2318   if (idx) {
2319     for (i=0; i<m; i++) {
2320       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2321     }
2322   }
2323 
2324   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2325   if (idx) {
2326     ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2327   }
2328   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2329 
2330   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2331   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2332   for (i=0; i<m; i++) {
2333     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2334       vv[i] = vb[i];
2335       if (idx) idx[i] = a->garray[idxb[i]];
2336     } else {
2337       vv[i] = va[i];
2338       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idx[i] > a->garray[idxb[i]])
2339         idx[i] = a->garray[idxb[i]];
2340     }
2341   }
2342   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2343   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2344   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2345   ierr = PetscFree(idxb);CHKERRQ(ierr);
2346   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2347   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2352 {
2353   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2354   PetscInt       m = A->rmap->n,n = A->cmap->n;
2355   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2356   PetscInt       *cmap  = mat->garray;
2357   PetscInt       *diagIdx, *offdiagIdx;
2358   Vec            diagV, offdiagV;
2359   PetscScalar    *a, *diagA, *offdiagA, *ba;
2360   PetscInt       r,j,col,ncols,*bi,*bj;
2361   PetscErrorCode ierr;
2362   Mat            B = mat->B;
2363   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2364 
2365   PetscFunctionBegin;
2366   /* When a process holds entire A and other processes have no entry */
2367   if (A->cmap->N == n) {
2368     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2369     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2370     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2371     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2372     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2373     PetscFunctionReturn(0);
2374   } else if (n == 0) {
2375     if (m) {
2376       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2377       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2378       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2379     }
2380     PetscFunctionReturn(0);
2381   }
2382 
2383   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2384   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2385   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2386   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2387 
2388   /* Get offdiagIdx[] for implicit 0.0 */
2389   ba = b->a;
2390   bi = b->i;
2391   bj = b->j;
2392   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2393   for (r = 0; r < m; r++) {
2394     ncols = bi[r+1] - bi[r];
2395     if (ncols == A->cmap->N - n) { /* Brow is dense */
2396       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2397     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398       offdiagA[r] = 0.0;
2399 
2400       /* Find first hole in the cmap */
2401       for (j=0; j<ncols; j++) {
2402         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403         if (col > j && j < cstart) {
2404           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405           break;
2406         } else if (col > j + n && j >= cstart) {
2407           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408           break;
2409         }
2410       }
2411       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2412         /* a hole is outside compressed Bcols */
2413         if (ncols == 0) {
2414           if (cstart) {
2415             offdiagIdx[r] = 0;
2416           } else offdiagIdx[r] = cend;
2417         } else { /* ncols > 0 */
2418           offdiagIdx[r] = cmap[ncols-1] + 1;
2419           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420         }
2421       }
2422     }
2423 
2424     for (j=0; j<ncols; j++) {
2425       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2426       ba++; bj++;
2427     }
2428   }
2429 
2430   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2431   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2432   for (r = 0; r < m; ++r) {
2433     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2434       a[r]   = diagA[r];
2435       if (idx) idx[r] = cstart + diagIdx[r];
2436     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2437       a[r] = diagA[r];
2438       if (idx) {
2439         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2440           idx[r] = cstart + diagIdx[r];
2441         } else idx[r] = offdiagIdx[r];
2442       }
2443     } else {
2444       a[r]   = offdiagA[r];
2445       if (idx) idx[r] = offdiagIdx[r];
2446     }
2447   }
2448   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2449   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2450   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2451   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2452   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2453   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2458 {
2459   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2460   PetscInt       m = A->rmap->n,n = A->cmap->n;
2461   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2462   PetscInt       *cmap  = mat->garray;
2463   PetscInt       *diagIdx, *offdiagIdx;
2464   Vec            diagV, offdiagV;
2465   PetscScalar    *a, *diagA, *offdiagA, *ba;
2466   PetscInt       r,j,col,ncols,*bi,*bj;
2467   PetscErrorCode ierr;
2468   Mat            B = mat->B;
2469   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2470 
2471   PetscFunctionBegin;
2472   /* When a process holds entire A and other processes have no entry */
2473   if (A->cmap->N == n) {
2474     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2475     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2476     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2477     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2478     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2479     PetscFunctionReturn(0);
2480   } else if (n == 0) {
2481     if (m) {
2482       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2483       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2484       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2485     }
2486     PetscFunctionReturn(0);
2487   }
2488 
2489   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2490   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2491   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2492   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2493 
2494   /* Get offdiagIdx[] for implicit 0.0 */
2495   ba = b->a;
2496   bi = b->i;
2497   bj = b->j;
2498   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2499   for (r = 0; r < m; r++) {
2500     ncols = bi[r+1] - bi[r];
2501     if (ncols == A->cmap->N - n) { /* Brow is dense */
2502       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2503     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2504       offdiagA[r] = 0.0;
2505 
2506       /* Find first hole in the cmap */
2507       for (j=0; j<ncols; j++) {
2508         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2509         if (col > j && j < cstart) {
2510           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2511           break;
2512         } else if (col > j + n && j >= cstart) {
2513           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2514           break;
2515         }
2516       }
2517       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2518         /* a hole is outside compressed Bcols */
2519         if (ncols == 0) {
2520           if (cstart) {
2521             offdiagIdx[r] = 0;
2522           } else offdiagIdx[r] = cend;
2523         } else { /* ncols > 0 */
2524           offdiagIdx[r] = cmap[ncols-1] + 1;
2525           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2526         }
2527       }
2528     }
2529 
2530     for (j=0; j<ncols; j++) {
2531       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2532       ba++; bj++;
2533     }
2534   }
2535 
2536   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2537   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2538   for (r = 0; r < m; ++r) {
2539     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2540       a[r]   = diagA[r];
2541       if (idx) idx[r] = cstart + diagIdx[r];
2542     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2543       a[r] = diagA[r];
2544       if (idx) {
2545         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2546           idx[r] = cstart + diagIdx[r];
2547         } else idx[r] = offdiagIdx[r];
2548       }
2549     } else {
2550       a[r]   = offdiagA[r];
2551       if (idx) idx[r] = offdiagIdx[r];
2552     }
2553   }
2554   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2555   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2556   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2557   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2558   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2559   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2564 {
2565   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2566   PetscInt       m = A->rmap->n,n = A->cmap->n;
2567   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2568   PetscInt       *cmap  = mat->garray;
2569   PetscInt       *diagIdx, *offdiagIdx;
2570   Vec            diagV, offdiagV;
2571   PetscScalar    *a, *diagA, *offdiagA, *ba;
2572   PetscInt       r,j,col,ncols,*bi,*bj;
2573   PetscErrorCode ierr;
2574   Mat            B = mat->B;
2575   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2576 
2577   PetscFunctionBegin;
2578   /* When a process holds entire A and other processes have no entry */
2579   if (A->cmap->N == n) {
2580     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2581     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2582     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2583     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2584     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2585     PetscFunctionReturn(0);
2586   } else if (n == 0) {
2587     if (m) {
2588       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2589       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2590       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2591     }
2592     PetscFunctionReturn(0);
2593   }
2594 
2595   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2596   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2597   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2598   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2599 
2600   /* Get offdiagIdx[] for implicit 0.0 */
2601   ba = b->a;
2602   bi = b->i;
2603   bj = b->j;
2604   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2605   for (r = 0; r < m; r++) {
2606     ncols = bi[r+1] - bi[r];
2607     if (ncols == A->cmap->N - n) { /* Brow is dense */
2608       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2609     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2610       offdiagA[r] = 0.0;
2611 
2612       /* Find first hole in the cmap */
2613       for (j=0; j<ncols; j++) {
2614         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2615         if (col > j && j < cstart) {
2616           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2617           break;
2618         } else if (col > j + n && j >= cstart) {
2619           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2620           break;
2621         }
2622       }
2623       if (j == ncols && B->cmap->N < A->cmap->N - n) {
2624         /* a hole is outside compressed Bcols */
2625         if (ncols == 0) {
2626           if (cstart) {
2627             offdiagIdx[r] = 0;
2628           } else offdiagIdx[r] = cend;
2629         } else { /* ncols > 0 */
2630           offdiagIdx[r] = cmap[ncols-1] + 1;
2631           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2632         }
2633       }
2634     }
2635 
2636     for (j=0; j<ncols; j++) {
2637       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2638       ba++; bj++;
2639     }
2640   }
2641 
2642   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2643   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2644   for (r = 0; r < m; ++r) {
2645     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2646       a[r] = diagA[r];
2647       if (idx) idx[r] = cstart + diagIdx[r];
2648     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2649       a[r] = diagA[r];
2650       if (idx) {
2651         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2652           idx[r] = cstart + diagIdx[r];
2653         } else idx[r] = offdiagIdx[r];
2654       }
2655     } else {
2656       a[r] = offdiagA[r];
2657       if (idx) idx[r] = offdiagIdx[r];
2658     }
2659   }
2660   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2661   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2662   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2663   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2664   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2665   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2670 {
2671   PetscErrorCode ierr;
2672   Mat            *dummy;
2673 
2674   PetscFunctionBegin;
2675   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2676   *newmat = *dummy;
2677   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2682 {
2683   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2684   PetscErrorCode ierr;
2685 
2686   PetscFunctionBegin;
2687   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2688   A->factorerrortype = a->A->factorerrortype;
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2693 {
2694   PetscErrorCode ierr;
2695   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2696 
2697   PetscFunctionBegin;
2698   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2699   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2700   if (x->assembled) {
2701     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2702   } else {
2703     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2704   }
2705   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2711 {
2712   PetscFunctionBegin;
2713   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2714   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 /*@
2719    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2720 
2721    Collective on Mat
2722 
2723    Input Parameters:
2724 +    A - the matrix
2725 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2726 
2727  Level: advanced
2728 
2729 @*/
2730 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2731 {
2732   PetscErrorCode       ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2740 {
2741   PetscErrorCode       ierr;
2742   PetscBool            sc = PETSC_FALSE,flg;
2743 
2744   PetscFunctionBegin;
2745   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2746   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2747   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2748   if (flg) {
2749     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2750   }
2751   ierr = PetscOptionsTail();CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2756 {
2757   PetscErrorCode ierr;
2758   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2759   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2760 
2761   PetscFunctionBegin;
2762   if (!Y->preallocated) {
2763     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2764   } else if (!aij->nz) {
2765     PetscInt nonew = aij->nonew;
2766     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2767     aij->nonew = nonew;
2768   }
2769   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2774 {
2775   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2776   PetscErrorCode ierr;
2777 
2778   PetscFunctionBegin;
2779   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2780   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2781   if (d) {
2782     PetscInt rstart;
2783     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2784     *d += rstart;
2785 
2786   }
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2791 {
2792   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2793   PetscErrorCode ierr;
2794 
2795   PetscFunctionBegin;
2796   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 /* -------------------------------------------------------------------*/
2801 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2802                                        MatGetRow_MPIAIJ,
2803                                        MatRestoreRow_MPIAIJ,
2804                                        MatMult_MPIAIJ,
2805                                 /* 4*/ MatMultAdd_MPIAIJ,
2806                                        MatMultTranspose_MPIAIJ,
2807                                        MatMultTransposeAdd_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                 /*10*/ NULL,
2812                                        NULL,
2813                                        NULL,
2814                                        MatSOR_MPIAIJ,
2815                                        MatTranspose_MPIAIJ,
2816                                 /*15*/ MatGetInfo_MPIAIJ,
2817                                        MatEqual_MPIAIJ,
2818                                        MatGetDiagonal_MPIAIJ,
2819                                        MatDiagonalScale_MPIAIJ,
2820                                        MatNorm_MPIAIJ,
2821                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2822                                        MatAssemblyEnd_MPIAIJ,
2823                                        MatSetOption_MPIAIJ,
2824                                        MatZeroEntries_MPIAIJ,
2825                                 /*24*/ MatZeroRows_MPIAIJ,
2826                                        NULL,
2827                                        NULL,
2828                                        NULL,
2829                                        NULL,
2830                                 /*29*/ MatSetUp_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        MatGetDiagonalBlock_MPIAIJ,
2834                                        NULL,
2835                                 /*34*/ MatDuplicate_MPIAIJ,
2836                                        NULL,
2837                                        NULL,
2838                                        NULL,
2839                                        NULL,
2840                                 /*39*/ MatAXPY_MPIAIJ,
2841                                        MatCreateSubMatrices_MPIAIJ,
2842                                        MatIncreaseOverlap_MPIAIJ,
2843                                        MatGetValues_MPIAIJ,
2844                                        MatCopy_MPIAIJ,
2845                                 /*44*/ MatGetRowMax_MPIAIJ,
2846                                        MatScale_MPIAIJ,
2847                                        MatShift_MPIAIJ,
2848                                        MatDiagonalSet_MPIAIJ,
2849                                        MatZeroRowsColumns_MPIAIJ,
2850                                 /*49*/ MatSetRandom_MPIAIJ,
2851                                        NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        NULL,
2855                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2856                                        NULL,
2857                                        MatSetUnfactored_MPIAIJ,
2858                                        MatPermute_MPIAIJ,
2859                                        NULL,
2860                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2861                                        MatDestroy_MPIAIJ,
2862                                        MatView_MPIAIJ,
2863                                        NULL,
2864                                        NULL,
2865                                 /*64*/ NULL,
2866                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2867                                        NULL,
2868                                        NULL,
2869                                        NULL,
2870                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2871                                        MatGetRowMinAbs_MPIAIJ,
2872                                        NULL,
2873                                        NULL,
2874                                        NULL,
2875                                        NULL,
2876                                 /*75*/ MatFDColoringApply_AIJ,
2877                                        MatSetFromOptions_MPIAIJ,
2878                                        NULL,
2879                                        NULL,
2880                                        MatFindZeroDiagonals_MPIAIJ,
2881                                 /*80*/ NULL,
2882                                        NULL,
2883                                        NULL,
2884                                 /*83*/ MatLoad_MPIAIJ,
2885                                        MatIsSymmetric_MPIAIJ,
2886                                        NULL,
2887                                        NULL,
2888                                        NULL,
2889                                        NULL,
2890                                 /*89*/ NULL,
2891                                        NULL,
2892                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2893                                        NULL,
2894                                        NULL,
2895                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2896                                        NULL,
2897                                        NULL,
2898                                        NULL,
2899                                        MatBindToCPU_MPIAIJ,
2900                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2901                                        NULL,
2902                                        NULL,
2903                                        MatConjugate_MPIAIJ,
2904                                        NULL,
2905                                 /*104*/MatSetValuesRow_MPIAIJ,
2906                                        MatRealPart_MPIAIJ,
2907                                        MatImaginaryPart_MPIAIJ,
2908                                        NULL,
2909                                        NULL,
2910                                 /*109*/NULL,
2911                                        NULL,
2912                                        MatGetRowMin_MPIAIJ,
2913                                        NULL,
2914                                        MatMissingDiagonal_MPIAIJ,
2915                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2916                                        NULL,
2917                                        MatGetGhosts_MPIAIJ,
2918                                        NULL,
2919                                        NULL,
2920                                 /*119*/NULL,
2921                                        NULL,
2922                                        NULL,
2923                                        NULL,
2924                                        MatGetMultiProcBlock_MPIAIJ,
2925                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2926                                        MatGetColumnNorms_MPIAIJ,
2927                                        MatInvertBlockDiagonal_MPIAIJ,
2928                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2929                                        MatCreateSubMatricesMPI_MPIAIJ,
2930                                 /*129*/NULL,
2931                                        NULL,
2932                                        NULL,
2933                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2934                                        NULL,
2935                                 /*134*/NULL,
2936                                        NULL,
2937                                        NULL,
2938                                        NULL,
2939                                        NULL,
2940                                 /*139*/MatSetBlockSizes_MPIAIJ,
2941                                        NULL,
2942                                        NULL,
2943                                        MatFDColoringSetUp_MPIXAIJ,
2944                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2945                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2946                                 /*145*/NULL,
2947                                        NULL,
2948                                        NULL
2949 };
2950 
2951 /* ----------------------------------------------------------------------------------------*/
2952 
2953 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2954 {
2955   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2960   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2965 {
2966   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2967   PetscErrorCode ierr;
2968 
2969   PetscFunctionBegin;
2970   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2971   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2976 {
2977   Mat_MPIAIJ     *b;
2978   PetscErrorCode ierr;
2979   PetscMPIInt    size;
2980 
2981   PetscFunctionBegin;
2982   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2983   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2984   b = (Mat_MPIAIJ*)B->data;
2985 
2986 #if defined(PETSC_USE_CTABLE)
2987   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2988 #else
2989   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2990 #endif
2991   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2992   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2993   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2994 
2995   /* Because the B will have been resized we simply destroy it and create a new one each time */
2996   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2997   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2998   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2999   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
3000   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3001   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3002   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3003 
3004   if (!B->preallocated) {
3005     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3006     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3007     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3008     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3009     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3010   }
3011 
3012   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3013   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3014   B->preallocated  = PETSC_TRUE;
3015   B->was_assembled = PETSC_FALSE;
3016   B->assembled     = PETSC_FALSE;
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3021 {
3022   Mat_MPIAIJ     *b;
3023   PetscErrorCode ierr;
3024 
3025   PetscFunctionBegin;
3026   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3027   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3028   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3029   b = (Mat_MPIAIJ*)B->data;
3030 
3031 #if defined(PETSC_USE_CTABLE)
3032   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3033 #else
3034   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3035 #endif
3036   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3037   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3038   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3039 
3040   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3041   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3042   B->preallocated  = PETSC_TRUE;
3043   B->was_assembled = PETSC_FALSE;
3044   B->assembled = PETSC_FALSE;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3049 {
3050   Mat            mat;
3051   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3052   PetscErrorCode ierr;
3053 
3054   PetscFunctionBegin;
3055   *newmat = NULL;
3056   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3057   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3058   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3059   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3060   a       = (Mat_MPIAIJ*)mat->data;
3061 
3062   mat->factortype   = matin->factortype;
3063   mat->assembled    = matin->assembled;
3064   mat->insertmode   = NOT_SET_VALUES;
3065   mat->preallocated = matin->preallocated;
3066 
3067   a->size         = oldmat->size;
3068   a->rank         = oldmat->rank;
3069   a->donotstash   = oldmat->donotstash;
3070   a->roworiented  = oldmat->roworiented;
3071   a->rowindices   = NULL;
3072   a->rowvalues    = NULL;
3073   a->getrowactive = PETSC_FALSE;
3074 
3075   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3076   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3077 
3078   if (oldmat->colmap) {
3079 #if defined(PETSC_USE_CTABLE)
3080     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3081 #else
3082     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3083     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3084     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3085 #endif
3086   } else a->colmap = NULL;
3087   if (oldmat->garray) {
3088     PetscInt len;
3089     len  = oldmat->B->cmap->n;
3090     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3091     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3092     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3093   } else a->garray = NULL;
3094 
3095   /* It may happen MatDuplicate is called with a non-assembled matrix
3096      In fact, MatDuplicate only requires the matrix to be preallocated
3097      This may happen inside a DMCreateMatrix_Shell */
3098   if (oldmat->lvec) {
3099     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3100     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3101   }
3102   if (oldmat->Mvctx) {
3103     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3104     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3105   }
3106   if (oldmat->Mvctx_mpi1) {
3107     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
3108     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
3109   }
3110 
3111   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3112   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3113   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3114   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3115   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3116   *newmat = mat;
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3121 {
3122   PetscBool      isbinary, ishdf5;
3123   PetscErrorCode ierr;
3124 
3125   PetscFunctionBegin;
3126   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3127   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3128   /* force binary viewer to load .info file if it has not yet done so */
3129   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3130   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3131   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3132   if (isbinary) {
3133     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3134   } else if (ishdf5) {
3135 #if defined(PETSC_HAVE_HDF5)
3136     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3137 #else
3138     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3139 #endif
3140   } else {
3141     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3142   }
3143   PetscFunctionReturn(0);
3144 }
3145 
3146 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3147 {
3148   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3149   PetscInt       *rowidxs,*colidxs;
3150   PetscScalar    *matvals;
3151   PetscErrorCode ierr;
3152 
3153   PetscFunctionBegin;
3154   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3155 
3156   /* read in matrix header */
3157   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3158   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3159   M  = header[1]; N = header[2]; nz = header[3];
3160   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3161   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3162   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3163 
3164   /* set block sizes from the viewer's .info file */
3165   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3166   /* set global sizes if not set already */
3167   if (mat->rmap->N < 0) mat->rmap->N = M;
3168   if (mat->cmap->N < 0) mat->cmap->N = N;
3169   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3170   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3171 
3172   /* check if the matrix sizes are correct */
3173   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3174   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3175 
3176   /* read in row lengths and build row indices */
3177   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3178   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3179   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3180   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3181   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3182   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3183   /* read in column indices and matrix values */
3184   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3185   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3186   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3187   /* store matrix indices and values */
3188   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3189   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3190   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3191   PetscFunctionReturn(0);
3192 }
3193 
3194 /* Not scalable because of ISAllGather() unless getting all columns. */
3195 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3196 {
3197   PetscErrorCode ierr;
3198   IS             iscol_local;
3199   PetscBool      isstride;
3200   PetscMPIInt    lisstride=0,gisstride;
3201 
3202   PetscFunctionBegin;
3203   /* check if we are grabbing all columns*/
3204   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3205 
3206   if (isstride) {
3207     PetscInt  start,len,mstart,mlen;
3208     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3209     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3210     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3211     if (mstart == start && mlen-mstart == len) lisstride = 1;
3212   }
3213 
3214   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3215   if (gisstride) {
3216     PetscInt N;
3217     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3218     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3219     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3220     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3221   } else {
3222     PetscInt cbs;
3223     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3224     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3225     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3226   }
3227 
3228   *isseq = iscol_local;
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 /*
3233  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3234  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3235 
3236  Input Parameters:
3237    mat - matrix
3238    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3239            i.e., mat->rstart <= isrow[i] < mat->rend
3240    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3241            i.e., mat->cstart <= iscol[i] < mat->cend
3242  Output Parameter:
3243    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3244    iscol_o - sequential column index set for retrieving mat->B
3245    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3246  */
3247 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3248 {
3249   PetscErrorCode ierr;
3250   Vec            x,cmap;
3251   const PetscInt *is_idx;
3252   PetscScalar    *xarray,*cmaparray;
3253   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3254   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3255   Mat            B=a->B;
3256   Vec            lvec=a->lvec,lcmap;
3257   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3258   MPI_Comm       comm;
3259   VecScatter     Mvctx=a->Mvctx;
3260 
3261   PetscFunctionBegin;
3262   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3263   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3264 
3265   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3266   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3267   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3268   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3269   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3270 
3271   /* Get start indices */
3272   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3273   isstart -= ncols;
3274   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3275 
3276   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3277   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3278   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3279   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3280   for (i=0; i<ncols; i++) {
3281     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3282     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3283     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3284   }
3285   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3286   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3287   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3288 
3289   /* Get iscol_d */
3290   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3291   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3292   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3293 
3294   /* Get isrow_d */
3295   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3296   rstart = mat->rmap->rstart;
3297   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3298   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3299   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3300   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3301 
3302   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3303   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3304   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3305 
3306   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3307   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3308   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3309 
3310   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3311 
3312   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3313   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3314 
3315   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3316   /* off-process column indices */
3317   count = 0;
3318   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3319   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3320 
3321   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3322   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3323   for (i=0; i<Bn; i++) {
3324     if (PetscRealPart(xarray[i]) > -1.0) {
3325       idx[count]     = i;                   /* local column index in off-diagonal part B */
3326       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3327       count++;
3328     }
3329   }
3330   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3331   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3332 
3333   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3334   /* cannot ensure iscol_o has same blocksize as iscol! */
3335 
3336   ierr = PetscFree(idx);CHKERRQ(ierr);
3337   *garray = cmap1;
3338 
3339   ierr = VecDestroy(&x);CHKERRQ(ierr);
3340   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3341   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3346 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3347 {
3348   PetscErrorCode ierr;
3349   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3350   Mat            M = NULL;
3351   MPI_Comm       comm;
3352   IS             iscol_d,isrow_d,iscol_o;
3353   Mat            Asub = NULL,Bsub = NULL;
3354   PetscInt       n;
3355 
3356   PetscFunctionBegin;
3357   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3358 
3359   if (call == MAT_REUSE_MATRIX) {
3360     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3361     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3362     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3363 
3364     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3365     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3366 
3367     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3368     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3369 
3370     /* Update diagonal and off-diagonal portions of submat */
3371     asub = (Mat_MPIAIJ*)(*submat)->data;
3372     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3373     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3374     if (n) {
3375       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3376     }
3377     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3378     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3379 
3380   } else { /* call == MAT_INITIAL_MATRIX) */
3381     const PetscInt *garray;
3382     PetscInt        BsubN;
3383 
3384     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3385     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3386 
3387     /* Create local submatrices Asub and Bsub */
3388     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3389     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3390 
3391     /* Create submatrix M */
3392     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3393 
3394     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3395     asub = (Mat_MPIAIJ*)M->data;
3396 
3397     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3398     n = asub->B->cmap->N;
3399     if (BsubN > n) {
3400       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3401       const PetscInt *idx;
3402       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3403       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3404 
3405       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3406       j = 0;
3407       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3408       for (i=0; i<n; i++) {
3409         if (j >= BsubN) break;
3410         while (subgarray[i] > garray[j]) j++;
3411 
3412         if (subgarray[i] == garray[j]) {
3413           idx_new[i] = idx[j++];
3414         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3415       }
3416       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3417 
3418       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3419       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3420 
3421     } else if (BsubN < n) {
3422       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3423     }
3424 
3425     ierr = PetscFree(garray);CHKERRQ(ierr);
3426     *submat = M;
3427 
3428     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3429     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3430     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3431 
3432     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3433     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3434 
3435     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3436     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3437   }
3438   PetscFunctionReturn(0);
3439 }
3440 
3441 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3442 {
3443   PetscErrorCode ierr;
3444   IS             iscol_local=NULL,isrow_d;
3445   PetscInt       csize;
3446   PetscInt       n,i,j,start,end;
3447   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3448   MPI_Comm       comm;
3449 
3450   PetscFunctionBegin;
3451   /* If isrow has same processor distribution as mat,
3452      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3453   if (call == MAT_REUSE_MATRIX) {
3454     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3455     if (isrow_d) {
3456       sameRowDist  = PETSC_TRUE;
3457       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3458     } else {
3459       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3460       if (iscol_local) {
3461         sameRowDist  = PETSC_TRUE;
3462         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3463       }
3464     }
3465   } else {
3466     /* Check if isrow has same processor distribution as mat */
3467     sameDist[0] = PETSC_FALSE;
3468     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3469     if (!n) {
3470       sameDist[0] = PETSC_TRUE;
3471     } else {
3472       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3473       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3474       if (i >= start && j < end) {
3475         sameDist[0] = PETSC_TRUE;
3476       }
3477     }
3478 
3479     /* Check if iscol has same processor distribution as mat */
3480     sameDist[1] = PETSC_FALSE;
3481     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3482     if (!n) {
3483       sameDist[1] = PETSC_TRUE;
3484     } else {
3485       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3486       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3487       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3488     }
3489 
3490     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3491     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3492     sameRowDist = tsameDist[0];
3493   }
3494 
3495   if (sameRowDist) {
3496     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3497       /* isrow and iscol have same processor distribution as mat */
3498       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3499       PetscFunctionReturn(0);
3500     } else { /* sameRowDist */
3501       /* isrow has same processor distribution as mat */
3502       if (call == MAT_INITIAL_MATRIX) {
3503         PetscBool sorted;
3504         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3505         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3506         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3507         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3508 
3509         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3510         if (sorted) {
3511           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3512           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3513           PetscFunctionReturn(0);
3514         }
3515       } else { /* call == MAT_REUSE_MATRIX */
3516         IS    iscol_sub;
3517         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3518         if (iscol_sub) {
3519           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3520           PetscFunctionReturn(0);
3521         }
3522       }
3523     }
3524   }
3525 
3526   /* General case: iscol -> iscol_local which has global size of iscol */
3527   if (call == MAT_REUSE_MATRIX) {
3528     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3529     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3530   } else {
3531     if (!iscol_local) {
3532       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3533     }
3534   }
3535 
3536   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3537   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3538 
3539   if (call == MAT_INITIAL_MATRIX) {
3540     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3541     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3542   }
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 /*@C
3547      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3548          and "off-diagonal" part of the matrix in CSR format.
3549 
3550    Collective
3551 
3552    Input Parameters:
3553 +  comm - MPI communicator
3554 .  A - "diagonal" portion of matrix
3555 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3556 -  garray - global index of B columns
3557 
3558    Output Parameter:
3559 .   mat - the matrix, with input A as its local diagonal matrix
3560    Level: advanced
3561 
3562    Notes:
3563        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3564        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3565 
3566 .seealso: MatCreateMPIAIJWithSplitArrays()
3567 @*/
3568 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3569 {
3570   PetscErrorCode ierr;
3571   Mat_MPIAIJ     *maij;
3572   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3573   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3574   PetscScalar    *oa=b->a;
3575   Mat            Bnew;
3576   PetscInt       m,n,N;
3577 
3578   PetscFunctionBegin;
3579   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3580   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3581   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3582   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3583   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3584   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3585 
3586   /* Get global columns of mat */
3587   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3588 
3589   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3590   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3591   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3592   maij = (Mat_MPIAIJ*)(*mat)->data;
3593 
3594   (*mat)->preallocated = PETSC_TRUE;
3595 
3596   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3597   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3598 
3599   /* Set A as diagonal portion of *mat */
3600   maij->A = A;
3601 
3602   nz = oi[m];
3603   for (i=0; i<nz; i++) {
3604     col   = oj[i];
3605     oj[i] = garray[col];
3606   }
3607 
3608    /* Set Bnew as off-diagonal portion of *mat */
3609   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3610   bnew        = (Mat_SeqAIJ*)Bnew->data;
3611   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3612   maij->B     = Bnew;
3613 
3614   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3615 
3616   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3617   b->free_a       = PETSC_FALSE;
3618   b->free_ij      = PETSC_FALSE;
3619   ierr = MatDestroy(&B);CHKERRQ(ierr);
3620 
3621   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3622   bnew->free_a       = PETSC_TRUE;
3623   bnew->free_ij      = PETSC_TRUE;
3624 
3625   /* condense columns of maij->B */
3626   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3627   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3628   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3629   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3630   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3635 
3636 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3637 {
3638   PetscErrorCode ierr;
3639   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3640   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3641   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3642   Mat            M,Msub,B=a->B;
3643   MatScalar      *aa;
3644   Mat_SeqAIJ     *aij;
3645   PetscInt       *garray = a->garray,*colsub,Ncols;
3646   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3647   IS             iscol_sub,iscmap;
3648   const PetscInt *is_idx,*cmap;
3649   PetscBool      allcolumns=PETSC_FALSE;
3650   MPI_Comm       comm;
3651 
3652   PetscFunctionBegin;
3653   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3654 
3655   if (call == MAT_REUSE_MATRIX) {
3656     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3657     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3658     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3659 
3660     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3661     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3662 
3663     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3664     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3665 
3666     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3667 
3668   } else { /* call == MAT_INITIAL_MATRIX) */
3669     PetscBool flg;
3670 
3671     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3672     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3673 
3674     /* (1) iscol -> nonscalable iscol_local */
3675     /* Check for special case: each processor gets entire matrix columns */
3676     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3677     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3678     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3679     if (allcolumns) {
3680       iscol_sub = iscol_local;
3681       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3682       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3683 
3684     } else {
3685       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3686       PetscInt *idx,*cmap1,k;
3687       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3688       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3689       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3690       count = 0;
3691       k     = 0;
3692       for (i=0; i<Ncols; i++) {
3693         j = is_idx[i];
3694         if (j >= cstart && j < cend) {
3695           /* diagonal part of mat */
3696           idx[count]     = j;
3697           cmap1[count++] = i; /* column index in submat */
3698         } else if (Bn) {
3699           /* off-diagonal part of mat */
3700           if (j == garray[k]) {
3701             idx[count]     = j;
3702             cmap1[count++] = i;  /* column index in submat */
3703           } else if (j > garray[k]) {
3704             while (j > garray[k] && k < Bn-1) k++;
3705             if (j == garray[k]) {
3706               idx[count]     = j;
3707               cmap1[count++] = i; /* column index in submat */
3708             }
3709           }
3710         }
3711       }
3712       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3713 
3714       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3715       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3716       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3717 
3718       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3719     }
3720 
3721     /* (3) Create sequential Msub */
3722     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3723   }
3724 
3725   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3726   aij  = (Mat_SeqAIJ*)(Msub)->data;
3727   ii   = aij->i;
3728   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3729 
3730   /*
3731       m - number of local rows
3732       Ncols - number of columns (same on all processors)
3733       rstart - first row in new global matrix generated
3734   */
3735   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3736 
3737   if (call == MAT_INITIAL_MATRIX) {
3738     /* (4) Create parallel newmat */
3739     PetscMPIInt    rank,size;
3740     PetscInt       csize;
3741 
3742     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3743     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3744 
3745     /*
3746         Determine the number of non-zeros in the diagonal and off-diagonal
3747         portions of the matrix in order to do correct preallocation
3748     */
3749 
3750     /* first get start and end of "diagonal" columns */
3751     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3752     if (csize == PETSC_DECIDE) {
3753       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3754       if (mglobal == Ncols) { /* square matrix */
3755         nlocal = m;
3756       } else {
3757         nlocal = Ncols/size + ((Ncols % size) > rank);
3758       }
3759     } else {
3760       nlocal = csize;
3761     }
3762     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3763     rstart = rend - nlocal;
3764     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3765 
3766     /* next, compute all the lengths */
3767     jj    = aij->j;
3768     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3769     olens = dlens + m;
3770     for (i=0; i<m; i++) {
3771       jend = ii[i+1] - ii[i];
3772       olen = 0;
3773       dlen = 0;
3774       for (j=0; j<jend; j++) {
3775         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3776         else dlen++;
3777         jj++;
3778       }
3779       olens[i] = olen;
3780       dlens[i] = dlen;
3781     }
3782 
3783     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3784     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3785 
3786     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3787     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3788     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3789     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3790     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3791     ierr = PetscFree(dlens);CHKERRQ(ierr);
3792 
3793   } else { /* call == MAT_REUSE_MATRIX */
3794     M    = *newmat;
3795     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3796     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3797     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3798     /*
3799          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3800        rather than the slower MatSetValues().
3801     */
3802     M->was_assembled = PETSC_TRUE;
3803     M->assembled     = PETSC_FALSE;
3804   }
3805 
3806   /* (5) Set values of Msub to *newmat */
3807   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3808   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3809 
3810   jj   = aij->j;
3811   aa   = aij->a;
3812   for (i=0; i<m; i++) {
3813     row = rstart + i;
3814     nz  = ii[i+1] - ii[i];
3815     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3816     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3817     jj += nz; aa += nz;
3818   }
3819   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3820 
3821   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3822   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3823 
3824   ierr = PetscFree(colsub);CHKERRQ(ierr);
3825 
3826   /* save Msub, iscol_sub and iscmap used in processor for next request */
3827   if (call ==  MAT_INITIAL_MATRIX) {
3828     *newmat = M;
3829     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3830     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3831 
3832     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3833     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3834 
3835     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3836     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3837 
3838     if (iscol_local) {
3839       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3840       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3841     }
3842   }
3843   PetscFunctionReturn(0);
3844 }
3845 
3846 /*
3847     Not great since it makes two copies of the submatrix, first an SeqAIJ
3848   in local and then by concatenating the local matrices the end result.
3849   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3850 
3851   Note: This requires a sequential iscol with all indices.
3852 */
3853 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3854 {
3855   PetscErrorCode ierr;
3856   PetscMPIInt    rank,size;
3857   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3858   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3859   Mat            M,Mreuse;
3860   MatScalar      *aa,*vwork;
3861   MPI_Comm       comm;
3862   Mat_SeqAIJ     *aij;
3863   PetscBool      colflag,allcolumns=PETSC_FALSE;
3864 
3865   PetscFunctionBegin;
3866   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3867   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3868   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3869 
3870   /* Check for special case: each processor gets entire matrix columns */
3871   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3872   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3873   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3874   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3875 
3876   if (call ==  MAT_REUSE_MATRIX) {
3877     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3878     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3879     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3880   } else {
3881     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3882   }
3883 
3884   /*
3885       m - number of local rows
3886       n - number of columns (same on all processors)
3887       rstart - first row in new global matrix generated
3888   */
3889   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3890   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3891   if (call == MAT_INITIAL_MATRIX) {
3892     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3893     ii  = aij->i;
3894     jj  = aij->j;
3895 
3896     /*
3897         Determine the number of non-zeros in the diagonal and off-diagonal
3898         portions of the matrix in order to do correct preallocation
3899     */
3900 
3901     /* first get start and end of "diagonal" columns */
3902     if (csize == PETSC_DECIDE) {
3903       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3904       if (mglobal == n) { /* square matrix */
3905         nlocal = m;
3906       } else {
3907         nlocal = n/size + ((n % size) > rank);
3908       }
3909     } else {
3910       nlocal = csize;
3911     }
3912     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3913     rstart = rend - nlocal;
3914     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3915 
3916     /* next, compute all the lengths */
3917     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3918     olens = dlens + m;
3919     for (i=0; i<m; i++) {
3920       jend = ii[i+1] - ii[i];
3921       olen = 0;
3922       dlen = 0;
3923       for (j=0; j<jend; j++) {
3924         if (*jj < rstart || *jj >= rend) olen++;
3925         else dlen++;
3926         jj++;
3927       }
3928       olens[i] = olen;
3929       dlens[i] = dlen;
3930     }
3931     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3932     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3933     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3934     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3935     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3936     ierr = PetscFree(dlens);CHKERRQ(ierr);
3937   } else {
3938     PetscInt ml,nl;
3939 
3940     M    = *newmat;
3941     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3942     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3943     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3944     /*
3945          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3946        rather than the slower MatSetValues().
3947     */
3948     M->was_assembled = PETSC_TRUE;
3949     M->assembled     = PETSC_FALSE;
3950   }
3951   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3952   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3953   ii   = aij->i;
3954   jj   = aij->j;
3955   aa   = aij->a;
3956   for (i=0; i<m; i++) {
3957     row   = rstart + i;
3958     nz    = ii[i+1] - ii[i];
3959     cwork = jj;     jj += nz;
3960     vwork = aa;     aa += nz;
3961     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3962   }
3963 
3964   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966   *newmat = M;
3967 
3968   /* save submatrix used in processor for next request */
3969   if (call ==  MAT_INITIAL_MATRIX) {
3970     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3971     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3972   }
3973   PetscFunctionReturn(0);
3974 }
3975 
3976 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3977 {
3978   PetscInt       m,cstart, cend,j,nnz,i,d;
3979   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3980   const PetscInt *JJ;
3981   PetscErrorCode ierr;
3982   PetscBool      nooffprocentries;
3983 
3984   PetscFunctionBegin;
3985   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3986 
3987   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3988   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3989   m      = B->rmap->n;
3990   cstart = B->cmap->rstart;
3991   cend   = B->cmap->rend;
3992   rstart = B->rmap->rstart;
3993 
3994   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3995 
3996   if (PetscDefined(USE_DEBUG)) {
3997     for (i=0; i<m; i++) {
3998       nnz = Ii[i+1]- Ii[i];
3999       JJ  = J + Ii[i];
4000       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4001       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4002       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4003     }
4004   }
4005 
4006   for (i=0; i<m; i++) {
4007     nnz     = Ii[i+1]- Ii[i];
4008     JJ      = J + Ii[i];
4009     nnz_max = PetscMax(nnz_max,nnz);
4010     d       = 0;
4011     for (j=0; j<nnz; j++) {
4012       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4013     }
4014     d_nnz[i] = d;
4015     o_nnz[i] = nnz - d;
4016   }
4017   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4018   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4019 
4020   for (i=0; i<m; i++) {
4021     ii   = i + rstart;
4022     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4023   }
4024   nooffprocentries    = B->nooffprocentries;
4025   B->nooffprocentries = PETSC_TRUE;
4026   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4027   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4028   B->nooffprocentries = nooffprocentries;
4029 
4030   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4031   PetscFunctionReturn(0);
4032 }
4033 
4034 /*@
4035    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4036    (the default parallel PETSc format).
4037 
4038    Collective
4039 
4040    Input Parameters:
4041 +  B - the matrix
4042 .  i - the indices into j for the start of each local row (starts with zero)
4043 .  j - the column indices for each local row (starts with zero)
4044 -  v - optional values in the matrix
4045 
4046    Level: developer
4047 
4048    Notes:
4049        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4050      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4051      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4052 
4053        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4054 
4055        The format which is used for the sparse matrix input, is equivalent to a
4056     row-major ordering.. i.e for the following matrix, the input data expected is
4057     as shown
4058 
4059 $        1 0 0
4060 $        2 0 3     P0
4061 $       -------
4062 $        4 5 6     P1
4063 $
4064 $     Process0 [P0]: rows_owned=[0,1]
4065 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4066 $        j =  {0,0,2}  [size = 3]
4067 $        v =  {1,2,3}  [size = 3]
4068 $
4069 $     Process1 [P1]: rows_owned=[2]
4070 $        i =  {0,3}    [size = nrow+1  = 1+1]
4071 $        j =  {0,1,2}  [size = 3]
4072 $        v =  {4,5,6}  [size = 3]
4073 
4074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4075           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4076 @*/
4077 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4078 {
4079   PetscErrorCode ierr;
4080 
4081   PetscFunctionBegin;
4082   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4083   PetscFunctionReturn(0);
4084 }
4085 
4086 /*@C
4087    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4088    (the default parallel PETSc format).  For good matrix assembly performance
4089    the user should preallocate the matrix storage by setting the parameters
4090    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4091    performance can be increased by more than a factor of 50.
4092 
4093    Collective
4094 
4095    Input Parameters:
4096 +  B - the matrix
4097 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4098            (same value is used for all local rows)
4099 .  d_nnz - array containing the number of nonzeros in the various rows of the
4100            DIAGONAL portion of the local submatrix (possibly different for each row)
4101            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4102            The size of this array is equal to the number of local rows, i.e 'm'.
4103            For matrices that will be factored, you must leave room for (and set)
4104            the diagonal entry even if it is zero.
4105 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4106            submatrix (same value is used for all local rows).
4107 -  o_nnz - array containing the number of nonzeros in the various rows of the
4108            OFF-DIAGONAL portion of the local submatrix (possibly different for
4109            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4110            structure. The size of this array is equal to the number
4111            of local rows, i.e 'm'.
4112 
4113    If the *_nnz parameter is given then the *_nz parameter is ignored
4114 
4115    The AIJ format (also called the Yale sparse matrix format or
4116    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4117    storage.  The stored row and column indices begin with zero.
4118    See Users-Manual: ch_mat for details.
4119 
4120    The parallel matrix is partitioned such that the first m0 rows belong to
4121    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4122    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4123 
4124    The DIAGONAL portion of the local submatrix of a processor can be defined
4125    as the submatrix which is obtained by extraction the part corresponding to
4126    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4127    first row that belongs to the processor, r2 is the last row belonging to
4128    the this processor, and c1-c2 is range of indices of the local part of a
4129    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4130    common case of a square matrix, the row and column ranges are the same and
4131    the DIAGONAL part is also square. The remaining portion of the local
4132    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4133 
4134    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4135 
4136    You can call MatGetInfo() to get information on how effective the preallocation was;
4137    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4138    You can also run with the option -info and look for messages with the string
4139    malloc in them to see if additional memory allocation was needed.
4140 
4141    Example usage:
4142 
4143    Consider the following 8x8 matrix with 34 non-zero values, that is
4144    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4145    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4146    as follows:
4147 
4148 .vb
4149             1  2  0  |  0  3  0  |  0  4
4150     Proc0   0  5  6  |  7  0  0  |  8  0
4151             9  0 10  | 11  0  0  | 12  0
4152     -------------------------------------
4153            13  0 14  | 15 16 17  |  0  0
4154     Proc1   0 18  0  | 19 20 21  |  0  0
4155             0  0  0  | 22 23  0  | 24  0
4156     -------------------------------------
4157     Proc2  25 26 27  |  0  0 28  | 29  0
4158            30  0  0  | 31 32 33  |  0 34
4159 .ve
4160 
4161    This can be represented as a collection of submatrices as:
4162 
4163 .vb
4164       A B C
4165       D E F
4166       G H I
4167 .ve
4168 
4169    Where the submatrices A,B,C are owned by proc0, D,E,F are
4170    owned by proc1, G,H,I are owned by proc2.
4171 
4172    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4173    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4174    The 'M','N' parameters are 8,8, and have the same values on all procs.
4175 
4176    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4177    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4178    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4179    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4180    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4181    matrix, ans [DF] as another SeqAIJ matrix.
4182 
4183    When d_nz, o_nz parameters are specified, d_nz storage elements are
4184    allocated for every row of the local diagonal submatrix, and o_nz
4185    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4186    One way to choose d_nz and o_nz is to use the max nonzerors per local
4187    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4188    In this case, the values of d_nz,o_nz are:
4189 .vb
4190      proc0 : dnz = 2, o_nz = 2
4191      proc1 : dnz = 3, o_nz = 2
4192      proc2 : dnz = 1, o_nz = 4
4193 .ve
4194    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4195    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4196    for proc3. i.e we are using 12+15+10=37 storage locations to store
4197    34 values.
4198 
4199    When d_nnz, o_nnz parameters are specified, the storage is specified
4200    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4201    In the above case the values for d_nnz,o_nnz are:
4202 .vb
4203      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4204      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4205      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4206 .ve
4207    Here the space allocated is sum of all the above values i.e 34, and
4208    hence pre-allocation is perfect.
4209 
4210    Level: intermediate
4211 
4212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4213           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4214 @*/
4215 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4216 {
4217   PetscErrorCode ierr;
4218 
4219   PetscFunctionBegin;
4220   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4221   PetscValidType(B,1);
4222   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 /*@
4227      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4228          CSR format for the local rows.
4229 
4230    Collective
4231 
4232    Input Parameters:
4233 +  comm - MPI communicator
4234 .  m - number of local rows (Cannot be PETSC_DECIDE)
4235 .  n - This value should be the same as the local size used in creating the
4236        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4237        calculated if N is given) For square matrices n is almost always m.
4238 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4239 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4240 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4241 .   j - column indices
4242 -   a - matrix values
4243 
4244    Output Parameter:
4245 .   mat - the matrix
4246 
4247    Level: intermediate
4248 
4249    Notes:
4250        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4251      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4252      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4253 
4254        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4255 
4256        The format which is used for the sparse matrix input, is equivalent to a
4257     row-major ordering.. i.e for the following matrix, the input data expected is
4258     as shown
4259 
4260        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4261 
4262 $        1 0 0
4263 $        2 0 3     P0
4264 $       -------
4265 $        4 5 6     P1
4266 $
4267 $     Process0 [P0]: rows_owned=[0,1]
4268 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4269 $        j =  {0,0,2}  [size = 3]
4270 $        v =  {1,2,3}  [size = 3]
4271 $
4272 $     Process1 [P1]: rows_owned=[2]
4273 $        i =  {0,3}    [size = nrow+1  = 1+1]
4274 $        j =  {0,1,2}  [size = 3]
4275 $        v =  {4,5,6}  [size = 3]
4276 
4277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4278           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4279 @*/
4280 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4281 {
4282   PetscErrorCode ierr;
4283 
4284   PetscFunctionBegin;
4285   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4286   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4287   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4288   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4289   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4290   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4291   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4292   PetscFunctionReturn(0);
4293 }
4294 
4295 /*@
4296      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4297          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4298 
4299    Collective
4300 
4301    Input Parameters:
4302 +  mat - the matrix
4303 .  m - number of local rows (Cannot be PETSC_DECIDE)
4304 .  n - This value should be the same as the local size used in creating the
4305        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4306        calculated if N is given) For square matrices n is almost always m.
4307 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4308 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4309 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4310 .  J - column indices
4311 -  v - matrix values
4312 
4313    Level: intermediate
4314 
4315 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4316           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4317 @*/
4318 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4319 {
4320   PetscErrorCode ierr;
4321   PetscInt       cstart,nnz,i,j;
4322   PetscInt       *ld;
4323   PetscBool      nooffprocentries;
4324   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4325   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4326   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4327   const PetscInt *Adi = Ad->i;
4328   PetscInt       ldi,Iii,md;
4329 
4330   PetscFunctionBegin;
4331   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4332   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4333   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4334   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4335 
4336   cstart = mat->cmap->rstart;
4337   if (!Aij->ld) {
4338     /* count number of entries below block diagonal */
4339     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4340     Aij->ld = ld;
4341     for (i=0; i<m; i++) {
4342       nnz  = Ii[i+1]- Ii[i];
4343       j     = 0;
4344       while  (J[j] < cstart && j < nnz) {j++;}
4345       J    += nnz;
4346       ld[i] = j;
4347     }
4348   } else {
4349     ld = Aij->ld;
4350   }
4351 
4352   for (i=0; i<m; i++) {
4353     nnz  = Ii[i+1]- Ii[i];
4354     Iii  = Ii[i];
4355     ldi  = ld[i];
4356     md   = Adi[i+1]-Adi[i];
4357     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4358     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4359     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4360     ad  += md;
4361     ao  += nnz - md;
4362   }
4363   nooffprocentries      = mat->nooffprocentries;
4364   mat->nooffprocentries = PETSC_TRUE;
4365   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4366   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4367   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4368   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4369   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4370   mat->nooffprocentries = nooffprocentries;
4371   PetscFunctionReturn(0);
4372 }
4373 
4374 /*@C
4375    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4376    (the default parallel PETSc format).  For good matrix assembly performance
4377    the user should preallocate the matrix storage by setting the parameters
4378    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4379    performance can be increased by more than a factor of 50.
4380 
4381    Collective
4382 
4383    Input Parameters:
4384 +  comm - MPI communicator
4385 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4386            This value should be the same as the local size used in creating the
4387            y vector for the matrix-vector product y = Ax.
4388 .  n - This value should be the same as the local size used in creating the
4389        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4390        calculated if N is given) For square matrices n is almost always m.
4391 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4392 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4393 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4394            (same value is used for all local rows)
4395 .  d_nnz - array containing the number of nonzeros in the various rows of the
4396            DIAGONAL portion of the local submatrix (possibly different for each row)
4397            or NULL, if d_nz is used to specify the nonzero structure.
4398            The size of this array is equal to the number of local rows, i.e 'm'.
4399 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4400            submatrix (same value is used for all local rows).
4401 -  o_nnz - array containing the number of nonzeros in the various rows of the
4402            OFF-DIAGONAL portion of the local submatrix (possibly different for
4403            each row) or NULL, if o_nz is used to specify the nonzero
4404            structure. The size of this array is equal to the number
4405            of local rows, i.e 'm'.
4406 
4407    Output Parameter:
4408 .  A - the matrix
4409 
4410    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4411    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4412    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4413 
4414    Notes:
4415    If the *_nnz parameter is given then the *_nz parameter is ignored
4416 
4417    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4418    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4419    storage requirements for this matrix.
4420 
4421    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4422    processor than it must be used on all processors that share the object for
4423    that argument.
4424 
4425    The user MUST specify either the local or global matrix dimensions
4426    (possibly both).
4427 
4428    The parallel matrix is partitioned across processors such that the
4429    first m0 rows belong to process 0, the next m1 rows belong to
4430    process 1, the next m2 rows belong to process 2 etc.. where
4431    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4432    values corresponding to [m x N] submatrix.
4433 
4434    The columns are logically partitioned with the n0 columns belonging
4435    to 0th partition, the next n1 columns belonging to the next
4436    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4437 
4438    The DIAGONAL portion of the local submatrix on any given processor
4439    is the submatrix corresponding to the rows and columns m,n
4440    corresponding to the given processor. i.e diagonal matrix on
4441    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4442    etc. The remaining portion of the local submatrix [m x (N-n)]
4443    constitute the OFF-DIAGONAL portion. The example below better
4444    illustrates this concept.
4445 
4446    For a square global matrix we define each processor's diagonal portion
4447    to be its local rows and the corresponding columns (a square submatrix);
4448    each processor's off-diagonal portion encompasses the remainder of the
4449    local matrix (a rectangular submatrix).
4450 
4451    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4452 
4453    When calling this routine with a single process communicator, a matrix of
4454    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4455    type of communicator, use the construction mechanism
4456 .vb
4457      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4458 .ve
4459 
4460 $     MatCreate(...,&A);
4461 $     MatSetType(A,MATMPIAIJ);
4462 $     MatSetSizes(A, m,n,M,N);
4463 $     MatMPIAIJSetPreallocation(A,...);
4464 
4465    By default, this format uses inodes (identical nodes) when possible.
4466    We search for consecutive rows with the same nonzero structure, thereby
4467    reusing matrix information to achieve increased efficiency.
4468 
4469    Options Database Keys:
4470 +  -mat_no_inode  - Do not use inodes
4471 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4472 
4473 
4474 
4475    Example usage:
4476 
4477    Consider the following 8x8 matrix with 34 non-zero values, that is
4478    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4479    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4480    as follows
4481 
4482 .vb
4483             1  2  0  |  0  3  0  |  0  4
4484     Proc0   0  5  6  |  7  0  0  |  8  0
4485             9  0 10  | 11  0  0  | 12  0
4486     -------------------------------------
4487            13  0 14  | 15 16 17  |  0  0
4488     Proc1   0 18  0  | 19 20 21  |  0  0
4489             0  0  0  | 22 23  0  | 24  0
4490     -------------------------------------
4491     Proc2  25 26 27  |  0  0 28  | 29  0
4492            30  0  0  | 31 32 33  |  0 34
4493 .ve
4494 
4495    This can be represented as a collection of submatrices as
4496 
4497 .vb
4498       A B C
4499       D E F
4500       G H I
4501 .ve
4502 
4503    Where the submatrices A,B,C are owned by proc0, D,E,F are
4504    owned by proc1, G,H,I are owned by proc2.
4505 
4506    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4507    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4508    The 'M','N' parameters are 8,8, and have the same values on all procs.
4509 
4510    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4511    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4512    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4513    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4514    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4515    matrix, ans [DF] as another SeqAIJ matrix.
4516 
4517    When d_nz, o_nz parameters are specified, d_nz storage elements are
4518    allocated for every row of the local diagonal submatrix, and o_nz
4519    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4520    One way to choose d_nz and o_nz is to use the max nonzerors per local
4521    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4522    In this case, the values of d_nz,o_nz are
4523 .vb
4524      proc0 : dnz = 2, o_nz = 2
4525      proc1 : dnz = 3, o_nz = 2
4526      proc2 : dnz = 1, o_nz = 4
4527 .ve
4528    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4529    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4530    for proc3. i.e we are using 12+15+10=37 storage locations to store
4531    34 values.
4532 
4533    When d_nnz, o_nnz parameters are specified, the storage is specified
4534    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4535    In the above case the values for d_nnz,o_nnz are
4536 .vb
4537      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4538      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4539      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4540 .ve
4541    Here the space allocated is sum of all the above values i.e 34, and
4542    hence pre-allocation is perfect.
4543 
4544    Level: intermediate
4545 
4546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4547           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4548 @*/
4549 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4550 {
4551   PetscErrorCode ierr;
4552   PetscMPIInt    size;
4553 
4554   PetscFunctionBegin;
4555   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4556   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4557   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4558   if (size > 1) {
4559     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4560     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4561   } else {
4562     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4563     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4564   }
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 /*@C
4569   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4570 
4571   Not collective
4572 
4573   Input Parameter:
4574 . A - The MPIAIJ matrix
4575 
4576   Output Parameters:
4577 + Ad - The local diagonal block as a SeqAIJ matrix
4578 . Ao - The local off-diagonal block as a SeqAIJ matrix
4579 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4580 
4581   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4582   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4583   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4584   local column numbers to global column numbers in the original matrix.
4585 
4586   Level: intermediate
4587 
4588 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4589 @*/
4590 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4591 {
4592   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4593   PetscBool      flg;
4594   PetscErrorCode ierr;
4595 
4596   PetscFunctionBegin;
4597   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4598   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4599   if (Ad)     *Ad     = a->A;
4600   if (Ao)     *Ao     = a->B;
4601   if (colmap) *colmap = a->garray;
4602   PetscFunctionReturn(0);
4603 }
4604 
4605 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4606 {
4607   PetscErrorCode ierr;
4608   PetscInt       m,N,i,rstart,nnz,Ii;
4609   PetscInt       *indx;
4610   PetscScalar    *values;
4611 
4612   PetscFunctionBegin;
4613   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4614   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4615     PetscInt       *dnz,*onz,sum,bs,cbs;
4616 
4617     if (n == PETSC_DECIDE) {
4618       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4619     }
4620     /* Check sum(n) = N */
4621     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4622     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4623 
4624     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4625     rstart -= m;
4626 
4627     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4628     for (i=0; i<m; i++) {
4629       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4630       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4631       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4632     }
4633 
4634     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4635     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4636     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4637     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4638     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4639     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4640     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4641     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4642   }
4643 
4644   /* numeric phase */
4645   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4646   for (i=0; i<m; i++) {
4647     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4648     Ii   = i + rstart;
4649     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4650     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4651   }
4652   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4653   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4654   PetscFunctionReturn(0);
4655 }
4656 
4657 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4658 {
4659   PetscErrorCode    ierr;
4660   PetscMPIInt       rank;
4661   PetscInt          m,N,i,rstart,nnz;
4662   size_t            len;
4663   const PetscInt    *indx;
4664   PetscViewer       out;
4665   char              *name;
4666   Mat               B;
4667   const PetscScalar *values;
4668 
4669   PetscFunctionBegin;
4670   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4671   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4672   /* Should this be the type of the diagonal block of A? */
4673   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4674   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4675   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4676   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4677   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4678   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4679   for (i=0; i<m; i++) {
4680     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4681     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4682     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4683   }
4684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4686 
4687   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4688   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4689   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4690   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4691   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4692   ierr = PetscFree(name);CHKERRQ(ierr);
4693   ierr = MatView(B,out);CHKERRQ(ierr);
4694   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4695   ierr = MatDestroy(&B);CHKERRQ(ierr);
4696   PetscFunctionReturn(0);
4697 }
4698 
4699 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4700 {
4701   PetscErrorCode      ierr;
4702   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4703 
4704   PetscFunctionBegin;
4705   if (!merge) PetscFunctionReturn(0);
4706   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4707   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4708   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4709   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4710   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4711   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4712   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4713   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4714   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4715   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4716   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4717   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4718   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4719   ierr = PetscFree(merge);CHKERRQ(ierr);
4720   PetscFunctionReturn(0);
4721 }
4722 
4723 #include <../src/mat/utils/freespace.h>
4724 #include <petscbt.h>
4725 
4726 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4727 {
4728   PetscErrorCode      ierr;
4729   MPI_Comm            comm;
4730   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4731   PetscMPIInt         size,rank,taga,*len_s;
4732   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4733   PetscInt            proc,m;
4734   PetscInt            **buf_ri,**buf_rj;
4735   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4736   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4737   MPI_Request         *s_waits,*r_waits;
4738   MPI_Status          *status;
4739   MatScalar           *aa=a->a;
4740   MatScalar           **abuf_r,*ba_i;
4741   Mat_Merge_SeqsToMPI *merge;
4742   PetscContainer      container;
4743 
4744   PetscFunctionBegin;
4745   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4746   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4747 
4748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4749   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4750 
4751   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4752   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4753   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4754 
4755   bi     = merge->bi;
4756   bj     = merge->bj;
4757   buf_ri = merge->buf_ri;
4758   buf_rj = merge->buf_rj;
4759 
4760   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4761   owners = merge->rowmap->range;
4762   len_s  = merge->len_s;
4763 
4764   /* send and recv matrix values */
4765   /*-----------------------------*/
4766   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4767   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4768 
4769   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4770   for (proc=0,k=0; proc<size; proc++) {
4771     if (!len_s[proc]) continue;
4772     i    = owners[proc];
4773     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4774     k++;
4775   }
4776 
4777   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4778   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4779   ierr = PetscFree(status);CHKERRQ(ierr);
4780 
4781   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4782   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4783 
4784   /* insert mat values of mpimat */
4785   /*----------------------------*/
4786   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4787   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4788 
4789   for (k=0; k<merge->nrecv; k++) {
4790     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4791     nrows       = *(buf_ri_k[k]);
4792     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4793     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4794   }
4795 
4796   /* set values of ba */
4797   m = merge->rowmap->n;
4798   for (i=0; i<m; i++) {
4799     arow = owners[rank] + i;
4800     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4801     bnzi = bi[i+1] - bi[i];
4802     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4803 
4804     /* add local non-zero vals of this proc's seqmat into ba */
4805     anzi   = ai[arow+1] - ai[arow];
4806     aj     = a->j + ai[arow];
4807     aa     = a->a + ai[arow];
4808     nextaj = 0;
4809     for (j=0; nextaj<anzi; j++) {
4810       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4811         ba_i[j] += aa[nextaj++];
4812       }
4813     }
4814 
4815     /* add received vals into ba */
4816     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4817       /* i-th row */
4818       if (i == *nextrow[k]) {
4819         anzi   = *(nextai[k]+1) - *nextai[k];
4820         aj     = buf_rj[k] + *(nextai[k]);
4821         aa     = abuf_r[k] + *(nextai[k]);
4822         nextaj = 0;
4823         for (j=0; nextaj<anzi; j++) {
4824           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4825             ba_i[j] += aa[nextaj++];
4826           }
4827         }
4828         nextrow[k]++; nextai[k]++;
4829       }
4830     }
4831     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4832   }
4833   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4834   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4835 
4836   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4837   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4838   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4839   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4840   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4841   PetscFunctionReturn(0);
4842 }
4843 
4844 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4845 {
4846   PetscErrorCode      ierr;
4847   Mat                 B_mpi;
4848   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4849   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4850   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4851   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4852   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4853   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4854   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4855   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4856   MPI_Status          *status;
4857   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4858   PetscBT             lnkbt;
4859   Mat_Merge_SeqsToMPI *merge;
4860   PetscContainer      container;
4861 
4862   PetscFunctionBegin;
4863   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4864 
4865   /* make sure it is a PETSc comm */
4866   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4867   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4868   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4869 
4870   ierr = PetscNew(&merge);CHKERRQ(ierr);
4871   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4872 
4873   /* determine row ownership */
4874   /*---------------------------------------------------------*/
4875   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4876   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4877   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4878   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4879   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4880   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4881   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4882 
4883   m      = merge->rowmap->n;
4884   owners = merge->rowmap->range;
4885 
4886   /* determine the number of messages to send, their lengths */
4887   /*---------------------------------------------------------*/
4888   len_s = merge->len_s;
4889 
4890   len          = 0; /* length of buf_si[] */
4891   merge->nsend = 0;
4892   for (proc=0; proc<size; proc++) {
4893     len_si[proc] = 0;
4894     if (proc == rank) {
4895       len_s[proc] = 0;
4896     } else {
4897       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4898       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4899     }
4900     if (len_s[proc]) {
4901       merge->nsend++;
4902       nrows = 0;
4903       for (i=owners[proc]; i<owners[proc+1]; i++) {
4904         if (ai[i+1] > ai[i]) nrows++;
4905       }
4906       len_si[proc] = 2*(nrows+1);
4907       len         += len_si[proc];
4908     }
4909   }
4910 
4911   /* determine the number and length of messages to receive for ij-structure */
4912   /*-------------------------------------------------------------------------*/
4913   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4914   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4915 
4916   /* post the Irecv of j-structure */
4917   /*-------------------------------*/
4918   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4919   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4920 
4921   /* post the Isend of j-structure */
4922   /*--------------------------------*/
4923   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4924 
4925   for (proc=0, k=0; proc<size; proc++) {
4926     if (!len_s[proc]) continue;
4927     i    = owners[proc];
4928     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4929     k++;
4930   }
4931 
4932   /* receives and sends of j-structure are complete */
4933   /*------------------------------------------------*/
4934   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4935   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4936 
4937   /* send and recv i-structure */
4938   /*---------------------------*/
4939   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4940   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4941 
4942   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4943   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4944   for (proc=0,k=0; proc<size; proc++) {
4945     if (!len_s[proc]) continue;
4946     /* form outgoing message for i-structure:
4947          buf_si[0]:                 nrows to be sent
4948                [1:nrows]:           row index (global)
4949                [nrows+1:2*nrows+1]: i-structure index
4950     */
4951     /*-------------------------------------------*/
4952     nrows       = len_si[proc]/2 - 1;
4953     buf_si_i    = buf_si + nrows+1;
4954     buf_si[0]   = nrows;
4955     buf_si_i[0] = 0;
4956     nrows       = 0;
4957     for (i=owners[proc]; i<owners[proc+1]; i++) {
4958       anzi = ai[i+1] - ai[i];
4959       if (anzi) {
4960         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4961         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4962         nrows++;
4963       }
4964     }
4965     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4966     k++;
4967     buf_si += len_si[proc];
4968   }
4969 
4970   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4971   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4972 
4973   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4974   for (i=0; i<merge->nrecv; i++) {
4975     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4976   }
4977 
4978   ierr = PetscFree(len_si);CHKERRQ(ierr);
4979   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4980   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4981   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4982   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4983   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4984   ierr = PetscFree(status);CHKERRQ(ierr);
4985 
4986   /* compute a local seq matrix in each processor */
4987   /*----------------------------------------------*/
4988   /* allocate bi array and free space for accumulating nonzero column info */
4989   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4990   bi[0] = 0;
4991 
4992   /* create and initialize a linked list */
4993   nlnk = N+1;
4994   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4995 
4996   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4997   len  = ai[owners[rank+1]] - ai[owners[rank]];
4998   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4999 
5000   current_space = free_space;
5001 
5002   /* determine symbolic info for each local row */
5003   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5004 
5005   for (k=0; k<merge->nrecv; k++) {
5006     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5007     nrows       = *buf_ri_k[k];
5008     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5009     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5010   }
5011 
5012   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5013   len  = 0;
5014   for (i=0; i<m; i++) {
5015     bnzi = 0;
5016     /* add local non-zero cols of this proc's seqmat into lnk */
5017     arow  = owners[rank] + i;
5018     anzi  = ai[arow+1] - ai[arow];
5019     aj    = a->j + ai[arow];
5020     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5021     bnzi += nlnk;
5022     /* add received col data into lnk */
5023     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5024       if (i == *nextrow[k]) { /* i-th row */
5025         anzi  = *(nextai[k]+1) - *nextai[k];
5026         aj    = buf_rj[k] + *nextai[k];
5027         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5028         bnzi += nlnk;
5029         nextrow[k]++; nextai[k]++;
5030       }
5031     }
5032     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5033 
5034     /* if free space is not available, make more free space */
5035     if (current_space->local_remaining<bnzi) {
5036       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5037       nspacedouble++;
5038     }
5039     /* copy data into free space, then initialize lnk */
5040     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5041     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5042 
5043     current_space->array           += bnzi;
5044     current_space->local_used      += bnzi;
5045     current_space->local_remaining -= bnzi;
5046 
5047     bi[i+1] = bi[i] + bnzi;
5048   }
5049 
5050   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5051 
5052   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5053   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5054   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5055 
5056   /* create symbolic parallel matrix B_mpi */
5057   /*---------------------------------------*/
5058   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5059   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5060   if (n==PETSC_DECIDE) {
5061     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5062   } else {
5063     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5064   }
5065   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5066   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5067   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5068   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5069   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5070 
5071   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5072   B_mpi->assembled  = PETSC_FALSE;
5073   merge->bi         = bi;
5074   merge->bj         = bj;
5075   merge->buf_ri     = buf_ri;
5076   merge->buf_rj     = buf_rj;
5077   merge->coi        = NULL;
5078   merge->coj        = NULL;
5079   merge->owners_co  = NULL;
5080 
5081   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5082 
5083   /* attach the supporting struct to B_mpi for reuse */
5084   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5085   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5086   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5087   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5088   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5089   *mpimat = B_mpi;
5090 
5091   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5092   PetscFunctionReturn(0);
5093 }
5094 
5095 /*@C
5096       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5097                  matrices from each processor
5098 
5099     Collective
5100 
5101    Input Parameters:
5102 +    comm - the communicators the parallel matrix will live on
5103 .    seqmat - the input sequential matrices
5104 .    m - number of local rows (or PETSC_DECIDE)
5105 .    n - number of local columns (or PETSC_DECIDE)
5106 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5107 
5108    Output Parameter:
5109 .    mpimat - the parallel matrix generated
5110 
5111     Level: advanced
5112 
5113    Notes:
5114      The dimensions of the sequential matrix in each processor MUST be the same.
5115      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5116      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5117 @*/
5118 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5119 {
5120   PetscErrorCode ierr;
5121   PetscMPIInt    size;
5122 
5123   PetscFunctionBegin;
5124   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5125   if (size == 1) {
5126     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5127     if (scall == MAT_INITIAL_MATRIX) {
5128       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5129     } else {
5130       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5131     }
5132     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5133     PetscFunctionReturn(0);
5134   }
5135   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5136   if (scall == MAT_INITIAL_MATRIX) {
5137     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5138   }
5139   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5140   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5141   PetscFunctionReturn(0);
5142 }
5143 
5144 /*@
5145      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5146           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5147           with MatGetSize()
5148 
5149     Not Collective
5150 
5151    Input Parameters:
5152 +    A - the matrix
5153 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5154 
5155    Output Parameter:
5156 .    A_loc - the local sequential matrix generated
5157 
5158     Level: developer
5159 
5160    Notes:
5161      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5162      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5163      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5164      modify the values of the returned A_loc.
5165 
5166 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5167 
5168 @*/
5169 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5170 {
5171   PetscErrorCode ierr;
5172   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5173   Mat_SeqAIJ     *mat,*a,*b;
5174   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5175   MatScalar      *aa,*ba,*cam;
5176   PetscScalar    *ca;
5177   PetscMPIInt    size;
5178   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5179   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5180   PetscBool      match;
5181 
5182   PetscFunctionBegin;
5183   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5184   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5185   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5186   if (size == 1) {
5187     if (scall == MAT_INITIAL_MATRIX) {
5188       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5189       *A_loc = mpimat->A;
5190     } else if (scall == MAT_REUSE_MATRIX) {
5191       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5192     }
5193     PetscFunctionReturn(0);
5194   }
5195 
5196   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5197   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5198   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5199   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5200   aa = a->a; ba = b->a;
5201   if (scall == MAT_INITIAL_MATRIX) {
5202     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5203     ci[0] = 0;
5204     for (i=0; i<am; i++) {
5205       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5206     }
5207     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5208     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5209     k    = 0;
5210     for (i=0; i<am; i++) {
5211       ncols_o = bi[i+1] - bi[i];
5212       ncols_d = ai[i+1] - ai[i];
5213       /* off-diagonal portion of A */
5214       for (jo=0; jo<ncols_o; jo++) {
5215         col = cmap[*bj];
5216         if (col >= cstart) break;
5217         cj[k]   = col; bj++;
5218         ca[k++] = *ba++;
5219       }
5220       /* diagonal portion of A */
5221       for (j=0; j<ncols_d; j++) {
5222         cj[k]   = cstart + *aj++;
5223         ca[k++] = *aa++;
5224       }
5225       /* off-diagonal portion of A */
5226       for (j=jo; j<ncols_o; j++) {
5227         cj[k]   = cmap[*bj++];
5228         ca[k++] = *ba++;
5229       }
5230     }
5231     /* put together the new matrix */
5232     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5233     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5234     /* Since these are PETSc arrays, change flags to free them as necessary. */
5235     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5236     mat->free_a  = PETSC_TRUE;
5237     mat->free_ij = PETSC_TRUE;
5238     mat->nonew   = 0;
5239   } else if (scall == MAT_REUSE_MATRIX) {
5240     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5241     ci = mat->i; cj = mat->j; cam = mat->a;
5242     for (i=0; i<am; i++) {
5243       /* off-diagonal portion of A */
5244       ncols_o = bi[i+1] - bi[i];
5245       for (jo=0; jo<ncols_o; jo++) {
5246         col = cmap[*bj];
5247         if (col >= cstart) break;
5248         *cam++ = *ba++; bj++;
5249       }
5250       /* diagonal portion of A */
5251       ncols_d = ai[i+1] - ai[i];
5252       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5253       /* off-diagonal portion of A */
5254       for (j=jo; j<ncols_o; j++) {
5255         *cam++ = *ba++; bj++;
5256       }
5257     }
5258   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5259   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5260   PetscFunctionReturn(0);
5261 }
5262 
5263 /*@C
5264      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5265 
5266     Not Collective
5267 
5268    Input Parameters:
5269 +    A - the matrix
5270 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5271 -    row, col - index sets of rows and columns to extract (or NULL)
5272 
5273    Output Parameter:
5274 .    A_loc - the local sequential matrix generated
5275 
5276     Level: developer
5277 
5278 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5279 
5280 @*/
5281 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5282 {
5283   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5284   PetscErrorCode ierr;
5285   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5286   IS             isrowa,iscola;
5287   Mat            *aloc;
5288   PetscBool      match;
5289 
5290   PetscFunctionBegin;
5291   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5292   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5293   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5294   if (!row) {
5295     start = A->rmap->rstart; end = A->rmap->rend;
5296     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5297   } else {
5298     isrowa = *row;
5299   }
5300   if (!col) {
5301     start = A->cmap->rstart;
5302     cmap  = a->garray;
5303     nzA   = a->A->cmap->n;
5304     nzB   = a->B->cmap->n;
5305     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5306     ncols = 0;
5307     for (i=0; i<nzB; i++) {
5308       if (cmap[i] < start) idx[ncols++] = cmap[i];
5309       else break;
5310     }
5311     imark = i;
5312     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5313     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5314     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5315   } else {
5316     iscola = *col;
5317   }
5318   if (scall != MAT_INITIAL_MATRIX) {
5319     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5320     aloc[0] = *A_loc;
5321   }
5322   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5323   if (!col) { /* attach global id of condensed columns */
5324     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5325   }
5326   *A_loc = aloc[0];
5327   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5328   if (!row) {
5329     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5330   }
5331   if (!col) {
5332     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5333   }
5334   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5335   PetscFunctionReturn(0);
5336 }
5337 
5338 /*
5339  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5340  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5341  * on a global size.
5342  * */
5343 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5344 {
5345   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5346   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5347   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5348   PetscMPIInt              owner;
5349   PetscSFNode              *iremote,*oiremote;
5350   const PetscInt           *lrowindices;
5351   PetscErrorCode           ierr;
5352   PetscSF                  sf,osf;
5353   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5354   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5355   MPI_Comm                 comm;
5356   ISLocalToGlobalMapping   mapping;
5357 
5358   PetscFunctionBegin;
5359   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5360   /* plocalsize is the number of roots
5361    * nrows is the number of leaves
5362    * */
5363   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5364   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5365   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5366   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5367   for (i=0;i<nrows;i++) {
5368     /* Find a remote index and an owner for a row
5369      * The row could be local or remote
5370      * */
5371     owner = 0;
5372     lidx  = 0;
5373     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5374     iremote[i].index = lidx;
5375     iremote[i].rank  = owner;
5376   }
5377   /* Create SF to communicate how many nonzero columns for each row */
5378   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5379   /* SF will figure out the number of nonzero colunms for each row, and their
5380    * offsets
5381    * */
5382   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5383   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5384   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5385 
5386   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5387   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5388   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5389   roffsets[0] = 0;
5390   roffsets[1] = 0;
5391   for (i=0;i<plocalsize;i++) {
5392     /* diag */
5393     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5394     /* off diag */
5395     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5396     /* compute offsets so that we relative location for each row */
5397     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5398     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5399   }
5400   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5401   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5402   /* 'r' means root, and 'l' means leaf */
5403   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5404   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5405   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5406   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5407   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5408   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5409   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5410   dntotalcols = 0;
5411   ontotalcols = 0;
5412   ncol = 0;
5413   for (i=0;i<nrows;i++) {
5414     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5415     ncol = PetscMax(pnnz[i],ncol);
5416     /* diag */
5417     dntotalcols += nlcols[i*2+0];
5418     /* off diag */
5419     ontotalcols += nlcols[i*2+1];
5420   }
5421   /* We do not need to figure the right number of columns
5422    * since all the calculations will be done by going through the raw data
5423    * */
5424   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5425   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5426   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5427   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5428   /* diag */
5429   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5430   /* off diag */
5431   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5432   /* diag */
5433   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5434   /* off diag */
5435   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5436   dntotalcols = 0;
5437   ontotalcols = 0;
5438   ntotalcols  = 0;
5439   for (i=0;i<nrows;i++) {
5440     owner = 0;
5441     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5442     /* Set iremote for diag matrix */
5443     for (j=0;j<nlcols[i*2+0];j++) {
5444       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5445       iremote[dntotalcols].rank    = owner;
5446       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5447       ilocal[dntotalcols++]        = ntotalcols++;
5448     }
5449     /* off diag */
5450     for (j=0;j<nlcols[i*2+1];j++) {
5451       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5452       oiremote[ontotalcols].rank    = owner;
5453       oilocal[ontotalcols++]        = ntotalcols++;
5454     }
5455   }
5456   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5457   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5458   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5459   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5460   /* P serves as roots and P_oth is leaves
5461    * Diag matrix
5462    * */
5463   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5464   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5465   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5466 
5467   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5468   /* Off diag */
5469   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5470   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5471   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5472   /* We operate on the matrix internal data for saving memory */
5473   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5474   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5475   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5476   /* Convert to global indices for diag matrix */
5477   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5478   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5479   /* We want P_oth store global indices */
5480   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5481   /* Use memory scalable approach */
5482   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5483   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5484   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5485   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5486   /* Convert back to local indices */
5487   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5488   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5489   nout = 0;
5490   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5491   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5492   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5493   /* Exchange values */
5494   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5495   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5496   /* Stop PETSc from shrinking memory */
5497   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5498   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5499   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5500   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5501   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5502   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5503   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5504   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5505   PetscFunctionReturn(0);
5506 }
5507 
5508 /*
5509  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5510  * This supports MPIAIJ and MAIJ
5511  * */
5512 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5513 {
5514   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5515   Mat_SeqAIJ            *p_oth;
5516   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5517   IS                    rows,map;
5518   PetscHMapI            hamp;
5519   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5520   MPI_Comm              comm;
5521   PetscSF               sf,osf;
5522   PetscBool             has;
5523   PetscErrorCode        ierr;
5524 
5525   PetscFunctionBegin;
5526   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5527   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5528   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5529    *  and then create a submatrix (that often is an overlapping matrix)
5530    * */
5531   if (reuse == MAT_INITIAL_MATRIX) {
5532     /* Use a hash table to figure out unique keys */
5533     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5534     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5535     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5536     count = 0;
5537     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5538     for (i=0;i<a->B->cmap->n;i++) {
5539       key  = a->garray[i]/dof;
5540       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5541       if (!has) {
5542         mapping[i] = count;
5543         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5544       } else {
5545         /* Current 'i' has the same value the previous step */
5546         mapping[i] = count-1;
5547       }
5548     }
5549     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5550     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5551     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5552     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5553     off = 0;
5554     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5555     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5556     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5557     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5558     /* In case, the matrix was already created but users want to recreate the matrix */
5559     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5560     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5561     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5562     ierr = ISDestroy(&map);CHKERRQ(ierr);
5563     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5564   } else if (reuse == MAT_REUSE_MATRIX) {
5565     /* If matrix was already created, we simply update values using SF objects
5566      * that as attached to the matrix ealier.
5567      *  */
5568     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5569     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5570     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5571     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5572     /* Update values in place */
5573     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5574     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5575     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5576     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5577   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5578   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5579   PetscFunctionReturn(0);
5580 }
5581 
5582 /*@C
5583     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5584 
5585     Collective on Mat
5586 
5587    Input Parameters:
5588 +    A,B - the matrices in mpiaij format
5589 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5590 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5591 
5592    Output Parameter:
5593 +    rowb, colb - index sets of rows and columns of B to extract
5594 -    B_seq - the sequential matrix generated
5595 
5596     Level: developer
5597 
5598 @*/
5599 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5600 {
5601   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5602   PetscErrorCode ierr;
5603   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5604   IS             isrowb,iscolb;
5605   Mat            *bseq=NULL;
5606 
5607   PetscFunctionBegin;
5608   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5609     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5610   }
5611   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5612 
5613   if (scall == MAT_INITIAL_MATRIX) {
5614     start = A->cmap->rstart;
5615     cmap  = a->garray;
5616     nzA   = a->A->cmap->n;
5617     nzB   = a->B->cmap->n;
5618     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5619     ncols = 0;
5620     for (i=0; i<nzB; i++) {  /* row < local row index */
5621       if (cmap[i] < start) idx[ncols++] = cmap[i];
5622       else break;
5623     }
5624     imark = i;
5625     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5626     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5627     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5628     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5629   } else {
5630     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5631     isrowb  = *rowb; iscolb = *colb;
5632     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5633     bseq[0] = *B_seq;
5634   }
5635   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5636   *B_seq = bseq[0];
5637   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5638   if (!rowb) {
5639     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5640   } else {
5641     *rowb = isrowb;
5642   }
5643   if (!colb) {
5644     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5645   } else {
5646     *colb = iscolb;
5647   }
5648   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5649   PetscFunctionReturn(0);
5650 }
5651 
5652 /*
5653     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5654     of the OFF-DIAGONAL portion of local A
5655 
5656     Collective on Mat
5657 
5658    Input Parameters:
5659 +    A,B - the matrices in mpiaij format
5660 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5661 
5662    Output Parameter:
5663 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5664 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5665 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5666 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5667 
5668     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5669      for this matrix. This is not desirable..
5670 
5671     Level: developer
5672 
5673 */
5674 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5675 {
5676   PetscErrorCode         ierr;
5677   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5678   Mat_SeqAIJ             *b_oth;
5679   VecScatter             ctx;
5680   MPI_Comm               comm;
5681   const PetscMPIInt      *rprocs,*sprocs;
5682   const PetscInt         *srow,*rstarts,*sstarts;
5683   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5684   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5685   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5686   MPI_Request            *rwaits = NULL,*swaits = NULL;
5687   MPI_Status             rstatus;
5688   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5689 
5690   PetscFunctionBegin;
5691   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5692   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5693 
5694   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5695     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5696   }
5697   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5698   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5699 
5700   if (size == 1) {
5701     startsj_s = NULL;
5702     bufa_ptr  = NULL;
5703     *B_oth    = NULL;
5704     PetscFunctionReturn(0);
5705   }
5706 
5707   ctx = a->Mvctx;
5708   tag = ((PetscObject)ctx)->tag;
5709 
5710   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5711   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5712   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5713   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5714   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5715   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5716   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5717 
5718   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5719   if (scall == MAT_INITIAL_MATRIX) {
5720     /* i-array */
5721     /*---------*/
5722     /*  post receives */
5723     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5724     for (i=0; i<nrecvs; i++) {
5725       rowlen = rvalues + rstarts[i]*rbs;
5726       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5727       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5728     }
5729 
5730     /* pack the outgoing message */
5731     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5732 
5733     sstartsj[0] = 0;
5734     rstartsj[0] = 0;
5735     len         = 0; /* total length of j or a array to be sent */
5736     if (nsends) {
5737       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5738       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5739     }
5740     for (i=0; i<nsends; i++) {
5741       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5742       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5743       for (j=0; j<nrows; j++) {
5744         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5745         for (l=0; l<sbs; l++) {
5746           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5747 
5748           rowlen[j*sbs+l] = ncols;
5749 
5750           len += ncols;
5751           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5752         }
5753         k++;
5754       }
5755       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5756 
5757       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5758     }
5759     /* recvs and sends of i-array are completed */
5760     i = nrecvs;
5761     while (i--) {
5762       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5763     }
5764     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5765     ierr = PetscFree(svalues);CHKERRQ(ierr);
5766 
5767     /* allocate buffers for sending j and a arrays */
5768     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5769     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5770 
5771     /* create i-array of B_oth */
5772     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5773 
5774     b_othi[0] = 0;
5775     len       = 0; /* total length of j or a array to be received */
5776     k         = 0;
5777     for (i=0; i<nrecvs; i++) {
5778       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5779       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5780       for (j=0; j<nrows; j++) {
5781         b_othi[k+1] = b_othi[k] + rowlen[j];
5782         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5783         k++;
5784       }
5785       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5786     }
5787     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5788 
5789     /* allocate space for j and a arrrays of B_oth */
5790     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5791     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5792 
5793     /* j-array */
5794     /*---------*/
5795     /*  post receives of j-array */
5796     for (i=0; i<nrecvs; i++) {
5797       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5798       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5799     }
5800 
5801     /* pack the outgoing message j-array */
5802     if (nsends) k = sstarts[0];
5803     for (i=0; i<nsends; i++) {
5804       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5805       bufJ  = bufj+sstartsj[i];
5806       for (j=0; j<nrows; j++) {
5807         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5808         for (ll=0; ll<sbs; ll++) {
5809           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5810           for (l=0; l<ncols; l++) {
5811             *bufJ++ = cols[l];
5812           }
5813           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5814         }
5815       }
5816       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5817     }
5818 
5819     /* recvs and sends of j-array are completed */
5820     i = nrecvs;
5821     while (i--) {
5822       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5823     }
5824     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5825   } else if (scall == MAT_REUSE_MATRIX) {
5826     sstartsj = *startsj_s;
5827     rstartsj = *startsj_r;
5828     bufa     = *bufa_ptr;
5829     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5830     b_otha   = b_oth->a;
5831   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5832 
5833   /* a-array */
5834   /*---------*/
5835   /*  post receives of a-array */
5836   for (i=0; i<nrecvs; i++) {
5837     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5838     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5839   }
5840 
5841   /* pack the outgoing message a-array */
5842   if (nsends) k = sstarts[0];
5843   for (i=0; i<nsends; i++) {
5844     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5845     bufA  = bufa+sstartsj[i];
5846     for (j=0; j<nrows; j++) {
5847       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5848       for (ll=0; ll<sbs; ll++) {
5849         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5850         for (l=0; l<ncols; l++) {
5851           *bufA++ = vals[l];
5852         }
5853         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5854       }
5855     }
5856     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5857   }
5858   /* recvs and sends of a-array are completed */
5859   i = nrecvs;
5860   while (i--) {
5861     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5862   }
5863   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5864   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5865 
5866   if (scall == MAT_INITIAL_MATRIX) {
5867     /* put together the new matrix */
5868     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5869 
5870     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5871     /* Since these are PETSc arrays, change flags to free them as necessary. */
5872     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5873     b_oth->free_a  = PETSC_TRUE;
5874     b_oth->free_ij = PETSC_TRUE;
5875     b_oth->nonew   = 0;
5876 
5877     ierr = PetscFree(bufj);CHKERRQ(ierr);
5878     if (!startsj_s || !bufa_ptr) {
5879       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5880       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5881     } else {
5882       *startsj_s = sstartsj;
5883       *startsj_r = rstartsj;
5884       *bufa_ptr  = bufa;
5885     }
5886   }
5887 
5888   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5889   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5890   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5891   PetscFunctionReturn(0);
5892 }
5893 
5894 /*@C
5895   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5896 
5897   Not Collective
5898 
5899   Input Parameters:
5900 . A - The matrix in mpiaij format
5901 
5902   Output Parameter:
5903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5904 . colmap - A map from global column index to local index into lvec
5905 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5906 
5907   Level: developer
5908 
5909 @*/
5910 #if defined(PETSC_USE_CTABLE)
5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5912 #else
5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5914 #endif
5915 {
5916   Mat_MPIAIJ *a;
5917 
5918   PetscFunctionBegin;
5919   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5920   PetscValidPointer(lvec, 2);
5921   PetscValidPointer(colmap, 3);
5922   PetscValidPointer(multScatter, 4);
5923   a = (Mat_MPIAIJ*) A->data;
5924   if (lvec) *lvec = a->lvec;
5925   if (colmap) *colmap = a->colmap;
5926   if (multScatter) *multScatter = a->Mvctx;
5927   PetscFunctionReturn(0);
5928 }
5929 
5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5933 #if defined(PETSC_HAVE_MKL_SPARSE)
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5935 #endif
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5938 #if defined(PETSC_HAVE_ELEMENTAL)
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5940 #endif
5941 #if defined(PETSC_HAVE_SCALAPACK)
5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5943 #endif
5944 #if defined(PETSC_HAVE_HYPRE)
5945 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5946 #endif
5947 #if defined(PETSC_HAVE_CUDA)
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5949 #endif
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5951 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5952 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5953 
5954 /*
5955     Computes (B'*A')' since computing B*A directly is untenable
5956 
5957                n                       p                          p
5958         [             ]       [             ]         [                 ]
5959       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5960         [             ]       [             ]         [                 ]
5961 
5962 */
5963 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5964 {
5965   PetscErrorCode ierr;
5966   Mat            At,Bt,Ct;
5967 
5968   PetscFunctionBegin;
5969   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5970   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5971   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5972   ierr = MatDestroy(&At);CHKERRQ(ierr);
5973   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5974   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5975   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5976   PetscFunctionReturn(0);
5977 }
5978 
5979 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5980 {
5981   PetscErrorCode ierr;
5982   PetscBool      cisdense;
5983 
5984   PetscFunctionBegin;
5985   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5986   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5987   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5988   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5989   if (!cisdense) {
5990     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5991   }
5992   ierr = MatSetUp(C);CHKERRQ(ierr);
5993 
5994   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5995   PetscFunctionReturn(0);
5996 }
5997 
5998 /* ----------------------------------------------------------------*/
5999 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6000 {
6001   Mat_Product *product = C->product;
6002   Mat         A = product->A,B=product->B;
6003 
6004   PetscFunctionBegin;
6005   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6006     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6007 
6008   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6009   C->ops->productsymbolic = MatProductSymbolic_AB;
6010   PetscFunctionReturn(0);
6011 }
6012 
6013 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6014 {
6015   PetscErrorCode ierr;
6016   Mat_Product    *product = C->product;
6017 
6018   PetscFunctionBegin;
6019   if (product->type == MATPRODUCT_AB) {
6020     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6021   }
6022   PetscFunctionReturn(0);
6023 }
6024 /* ----------------------------------------------------------------*/
6025 
6026 /*MC
6027    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6028 
6029    Options Database Keys:
6030 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6031 
6032    Level: beginner
6033 
6034    Notes:
6035     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6036     in this case the values associated with the rows and columns one passes in are set to zero
6037     in the matrix
6038 
6039     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6040     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6041 
6042 .seealso: MatCreateAIJ()
6043 M*/
6044 
6045 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6046 {
6047   Mat_MPIAIJ     *b;
6048   PetscErrorCode ierr;
6049   PetscMPIInt    size;
6050 
6051   PetscFunctionBegin;
6052   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6053 
6054   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6055   B->data       = (void*)b;
6056   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6057   B->assembled  = PETSC_FALSE;
6058   B->insertmode = NOT_SET_VALUES;
6059   b->size       = size;
6060 
6061   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6062 
6063   /* build cache for off array entries formed */
6064   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6065 
6066   b->donotstash  = PETSC_FALSE;
6067   b->colmap      = NULL;
6068   b->garray      = NULL;
6069   b->roworiented = PETSC_TRUE;
6070 
6071   /* stuff used for matrix vector multiply */
6072   b->lvec  = NULL;
6073   b->Mvctx = NULL;
6074 
6075   /* stuff for MatGetRow() */
6076   b->rowindices   = NULL;
6077   b->rowvalues    = NULL;
6078   b->getrowactive = PETSC_FALSE;
6079 
6080   /* flexible pointer used in CUSP/CUSPARSE classes */
6081   b->spptr = NULL;
6082 
6083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6093 #if defined(PETSC_HAVE_MKL_SPARSE)
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6095 #endif
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6099 #if defined(PETSC_HAVE_ELEMENTAL)
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6101 #endif
6102 #if defined(PETSC_HAVE_SCALAPACK)
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6104 #endif
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6107 #if defined(PETSC_HAVE_HYPRE)
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6110 #endif
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6113   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6114   PetscFunctionReturn(0);
6115 }
6116 
6117 /*@C
6118      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6119          and "off-diagonal" part of the matrix in CSR format.
6120 
6121    Collective
6122 
6123    Input Parameters:
6124 +  comm - MPI communicator
6125 .  m - number of local rows (Cannot be PETSC_DECIDE)
6126 .  n - This value should be the same as the local size used in creating the
6127        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6128        calculated if N is given) For square matrices n is almost always m.
6129 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6130 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6131 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6132 .   j - column indices
6133 .   a - matrix values
6134 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6135 .   oj - column indices
6136 -   oa - matrix values
6137 
6138    Output Parameter:
6139 .   mat - the matrix
6140 
6141    Level: advanced
6142 
6143    Notes:
6144        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6145        must free the arrays once the matrix has been destroyed and not before.
6146 
6147        The i and j indices are 0 based
6148 
6149        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6150 
6151        This sets local rows and cannot be used to set off-processor values.
6152 
6153        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6154        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6155        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6156        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6157        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6158        communication if it is known that only local entries will be set.
6159 
6160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6161           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6162 @*/
6163 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6164 {
6165   PetscErrorCode ierr;
6166   Mat_MPIAIJ     *maij;
6167 
6168   PetscFunctionBegin;
6169   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6170   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6171   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6172   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6173   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6174   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6175   maij = (Mat_MPIAIJ*) (*mat)->data;
6176 
6177   (*mat)->preallocated = PETSC_TRUE;
6178 
6179   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6180   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6181 
6182   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6183   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6184 
6185   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6186   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6187   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6188   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6189 
6190   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6191   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6192   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6193   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6194   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6195   PetscFunctionReturn(0);
6196 }
6197 
6198 /*
6199     Special version for direct calls from Fortran
6200 */
6201 #include <petsc/private/fortranimpl.h>
6202 
6203 /* Change these macros so can be used in void function */
6204 #undef CHKERRQ
6205 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6206 #undef SETERRQ2
6207 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6208 #undef SETERRQ3
6209 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6210 #undef SETERRQ
6211 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6212 
6213 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6214 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6215 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6216 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6217 #else
6218 #endif
6219 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6220 {
6221   Mat            mat  = *mmat;
6222   PetscInt       m    = *mm, n = *mn;
6223   InsertMode     addv = *maddv;
6224   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6225   PetscScalar    value;
6226   PetscErrorCode ierr;
6227 
6228   MatCheckPreallocated(mat,1);
6229   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6230   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6231   {
6232     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6233     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6234     PetscBool roworiented = aij->roworiented;
6235 
6236     /* Some Variables required in the macro */
6237     Mat        A                    = aij->A;
6238     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6239     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6240     MatScalar  *aa                  = a->a;
6241     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6242     Mat        B                    = aij->B;
6243     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6244     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6245     MatScalar  *ba                  = b->a;
6246     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6247      * cannot use "#if defined" inside a macro. */
6248     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6249 
6250     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6251     PetscInt  nonew = a->nonew;
6252     MatScalar *ap1,*ap2;
6253 
6254     PetscFunctionBegin;
6255     for (i=0; i<m; i++) {
6256       if (im[i] < 0) continue;
6257       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6258       if (im[i] >= rstart && im[i] < rend) {
6259         row      = im[i] - rstart;
6260         lastcol1 = -1;
6261         rp1      = aj + ai[row];
6262         ap1      = aa + ai[row];
6263         rmax1    = aimax[row];
6264         nrow1    = ailen[row];
6265         low1     = 0;
6266         high1    = nrow1;
6267         lastcol2 = -1;
6268         rp2      = bj + bi[row];
6269         ap2      = ba + bi[row];
6270         rmax2    = bimax[row];
6271         nrow2    = bilen[row];
6272         low2     = 0;
6273         high2    = nrow2;
6274 
6275         for (j=0; j<n; j++) {
6276           if (roworiented) value = v[i*n+j];
6277           else value = v[i+j*m];
6278           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6279           if (in[j] >= cstart && in[j] < cend) {
6280             col = in[j] - cstart;
6281             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6282 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6283             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6284 #endif
6285           } else if (in[j] < 0) continue;
6286           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6287             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6288             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6289           } else {
6290             if (mat->was_assembled) {
6291               if (!aij->colmap) {
6292                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6293               }
6294 #if defined(PETSC_USE_CTABLE)
6295               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6296               col--;
6297 #else
6298               col = aij->colmap[in[j]] - 1;
6299 #endif
6300               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6301                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6302                 col  =  in[j];
6303                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6304                 B        = aij->B;
6305                 b        = (Mat_SeqAIJ*)B->data;
6306                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6307                 rp2      = bj + bi[row];
6308                 ap2      = ba + bi[row];
6309                 rmax2    = bimax[row];
6310                 nrow2    = bilen[row];
6311                 low2     = 0;
6312                 high2    = nrow2;
6313                 bm       = aij->B->rmap->n;
6314                 ba       = b->a;
6315                 inserted = PETSC_FALSE;
6316               }
6317             } else col = in[j];
6318             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6319 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
6320             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6321 #endif
6322           }
6323         }
6324       } else if (!aij->donotstash) {
6325         if (roworiented) {
6326           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6327         } else {
6328           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6329         }
6330       }
6331     }
6332   }
6333   PetscFunctionReturnVoid();
6334 }
6335