xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b0e5478f48a955d59d1e0cffcab5e7e5d441fce2)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           goto a_noinsert; \
468         } \
469       }  \
470       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
471       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
472       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
473       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
474       N = nrow1++ - 1; a->nz++; high1++; \
475       /* shift up all the later entries in this row */ \
476       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
477       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
478       rp1[_i] = col;  \
479       ap1[_i] = value;  \
480       A->nonzerostate++;\
481       a_noinsert: ; \
482       ailen[row] = nrow1; \
483 }
484 
485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
486   { \
487     if (col <= lastcol2) low2 = 0;                        \
488     else high2 = nrow2;                                   \
489     lastcol2 = col;                                       \
490     while (high2-low2 > 5) {                              \
491       t = (low2+high2)/2;                                 \
492       if (rp2[t] > col) high2 = t;                        \
493       else             low2  = t;                         \
494     }                                                     \
495     for (_i=low2; _i<high2; _i++) {                       \
496       if (rp2[_i] > col) break;                           \
497       if (rp2[_i] == col) {                               \
498         if (addv == ADD_VALUES) {                         \
499           ap2[_i] += value;                               \
500           (void)PetscLogFlops(1.0);                       \
501         }                                                 \
502         else                    ap2[_i] = value;          \
503         goto b_noinsert;                                  \
504       }                                                   \
505     }                                                     \
506     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
507     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
508     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
509     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
510     N = nrow2++ - 1; b->nz++; high2++;                    \
511     /* shift up all the later entries in this row */      \
512     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
513     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
514     rp2[_i] = col;                                        \
515     ap2[_i] = value;                                      \
516     B->nonzerostate++;                                    \
517     b_noinsert: ;                                         \
518     bilen[row] = nrow2;                                   \
519   }
520 
521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
522 {
523   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
524   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
525   PetscErrorCode ierr;
526   PetscInt       l,*garray = mat->garray,diag;
527 
528   PetscFunctionBegin;
529   /* code only works for square matrices A */
530 
531   /* find size of row to the left of the diagonal part */
532   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
533   row  = row - diag;
534   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
535     if (garray[b->j[b->i[row]+l]] > diag) break;
536   }
537   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
538 
539   /* diagonal part */
540   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
541 
542   /* right of diagonal part */
543   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
548 {
549   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
550   PetscScalar    value = 0.0;
551   PetscErrorCode ierr;
552   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
554   PetscBool      roworiented = aij->roworiented;
555 
556   /* Some Variables required in the macro */
557   Mat        A                 = aij->A;
558   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
559   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
560   MatScalar  *aa               = a->a;
561   PetscBool  ignorezeroentries = a->ignorezeroentries;
562   Mat        B                 = aij->B;
563   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
564   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
565   MatScalar  *ba               = b->a;
566 
567   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
568   PetscInt  nonew;
569   MatScalar *ap1,*ap2;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (im[i] < 0) continue;
574 #if defined(PETSC_USE_DEBUG)
575     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
576 #endif
577     if (im[i] >= rstart && im[i] < rend) {
578       row      = im[i] - rstart;
579       lastcol1 = -1;
580       rp1      = aj + ai[row];
581       ap1      = aa + ai[row];
582       rmax1    = aimax[row];
583       nrow1    = ailen[row];
584       low1     = 0;
585       high1    = nrow1;
586       lastcol2 = -1;
587       rp2      = bj + bi[row];
588       ap2      = ba + bi[row];
589       rmax2    = bimax[row];
590       nrow2    = bilen[row];
591       low2     = 0;
592       high2    = nrow2;
593 
594       for (j=0; j<n; j++) {
595         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
596         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
597         if (in[j] >= cstart && in[j] < cend) {
598           col   = in[j] - cstart;
599           nonew = a->nonew;
600           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
601         } else if (in[j] < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
604 #endif
605         else {
606           if (mat->was_assembled) {
607             if (!aij->colmap) {
608               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609             }
610 #if defined(PETSC_USE_CTABLE)
611             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
612             col--;
613 #else
614             col = aij->colmap[in[j]] - 1;
615 #endif
616             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
617               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
618               col  =  in[j];
619               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
620               B     = aij->B;
621               b     = (Mat_SeqAIJ*)B->data;
622               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
623               rp2   = bj + bi[row];
624               ap2   = ba + bi[row];
625               rmax2 = bimax[row];
626               nrow2 = bilen[row];
627               low2  = 0;
628               high2 = nrow2;
629               bm    = aij->B->rmap->n;
630               ba    = b->a;
631             } else if (col < 0) {
632               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
633                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
634               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
635             }
636           } else col = in[j];
637           nonew = b->nonew;
638           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
639         }
640       }
641     } else {
642       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
643       if (!aij->donotstash) {
644         mat->assembled = PETSC_FALSE;
645         if (roworiented) {
646           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
647         } else {
648           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
649         }
650       }
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
660 */
661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
662 {
663   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
664   Mat            A           = aij->A; /* diagonal part of the matrix */
665   Mat            B           = aij->B; /* offdiagonal part of the matrix */
666   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
667   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
668   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
669   PetscInt       *ailen      = a->ilen,*aj = a->j;
670   PetscInt       *bilen      = b->ilen,*bj = b->j;
671   PetscInt       am          = aij->A->rmap->n,j;
672   PetscInt       diag_so_far = 0,dnz;
673   PetscInt       offd_so_far = 0,onz;
674 
675   PetscFunctionBegin;
676   /* Iterate over all rows of the matrix */
677   for (j=0; j<am; j++) {
678     dnz = onz = 0;
679     /*  Iterate over all non-zero columns of the current row */
680     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
681       /* If column is in the diagonal */
682       if (mat_j[col] >= cstart && mat_j[col] < cend) {
683         aj[diag_so_far++] = mat_j[col] - cstart;
684         dnz++;
685       } else { /* off-diagonal entries */
686         bj[offd_so_far++] = mat_j[col];
687         onz++;
688       }
689     }
690     ailen[j] = dnz;
691     bilen[j] = onz;
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 /*
697     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
698     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
699     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
700     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
701     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
702 */
703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
704 {
705   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
706   Mat            A      = aij->A; /* diagonal part of the matrix */
707   Mat            B      = aij->B; /* offdiagonal part of the matrix */
708   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
709   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
710   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
711   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
712   PetscInt       *ailen = a->ilen,*aj = a->j;
713   PetscInt       *bilen = b->ilen,*bj = b->j;
714   PetscInt       am     = aij->A->rmap->n,j;
715   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
716   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
717   PetscScalar    *aa = a->a,*ba = b->a;
718 
719   PetscFunctionBegin;
720   /* Iterate over all rows of the matrix */
721   for (j=0; j<am; j++) {
722     dnz_row = onz_row = 0;
723     rowstart_offd = full_offd_i[j];
724     rowstart_diag = full_diag_i[j];
725     /*  Iterate over all non-zero columns of the current row */
726     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
727       /* If column is in the diagonal */
728       if (mat_j[col] >= cstart && mat_j[col] < cend) {
729         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
730         aa[rowstart_diag+dnz_row] = mat_a[col];
731         dnz_row++;
732       } else { /* off-diagonal entries */
733         bj[rowstart_offd+onz_row] = mat_j[col];
734         ba[rowstart_offd+onz_row] = mat_a[col];
735         onz_row++;
736       }
737     }
738     ailen[j] = dnz_row;
739     bilen[j] = onz_row;
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
745 {
746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
749   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
750 
751   PetscFunctionBegin;
752   for (i=0; i<m; i++) {
753     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
754     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
755     if (idxm[i] >= rstart && idxm[i] < rend) {
756       row = idxm[i] - rstart;
757       for (j=0; j<n; j++) {
758         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
759         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
760         if (idxn[j] >= cstart && idxn[j] < cend) {
761           col  = idxn[j] - cstart;
762           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763         } else {
764           if (!aij->colmap) {
765             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
766           }
767 #if defined(PETSC_USE_CTABLE)
768           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
769           col--;
770 #else
771           col = aij->colmap[idxn[j]] - 1;
772 #endif
773           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
774           else {
775             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776           }
777         }
778       }
779     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
785 
786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
787 {
788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
789   PetscErrorCode ierr;
790   PetscInt       nstash,reallocs;
791 
792   PetscFunctionBegin;
793   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
794 
795   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
796   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
797   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
805   PetscErrorCode ierr;
806   PetscMPIInt    n;
807   PetscInt       i,j,rstart,ncols,flg;
808   PetscInt       *row,*col;
809   PetscBool      other_disassembled;
810   PetscScalar    *val;
811 
812   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
813 
814   PetscFunctionBegin;
815   if (!aij->donotstash && !mat->nooffprocentries) {
816     while (1) {
817       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
818       if (!flg) break;
819 
820       for (i=0; i<n; ) {
821         /* Now identify the consecutive vals belonging to the same row */
822         for (j=i,rstart=row[j]; j<n; j++) {
823           if (row[j] != rstart) break;
824         }
825         if (j < n) ncols = j-i;
826         else       ncols = n-i;
827         /* Now assemble all these values with a single function call */
828         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
829 
830         i = j;
831       }
832     }
833     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
834   }
835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
836   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
837 #endif
838   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
839   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
840 
841   /* determine if any processor has disassembled, if so we must
842      also disassemble ourself, in order that we may reassemble. */
843   /*
844      if nonzero structure of submatrix B cannot change then we know that
845      no processor disassembled thus we can skip this stuff
846   */
847   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
848     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
849     if (mat->was_assembled && !other_disassembled) {
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
852 #endif
853       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
854     }
855   }
856   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
857     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
858   }
859   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
861   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
862 #endif
863   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
865 
866   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
867 
868   aij->rowvalues = 0;
869 
870   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
871   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
875     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
876     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
877   }
878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
879   mat->offloadmask = PETSC_OFFLOAD_BOTH;
880 #endif
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
885 {
886   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
891   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
896 {
897   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
898   PetscObjectState sA, sB;
899   PetscInt        *lrows;
900   PetscInt         r, len;
901   PetscBool        cong, lch, gch;
902   PetscErrorCode   ierr;
903 
904   PetscFunctionBegin;
905   /* get locally owned rows */
906   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
907   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
908   /* fix right hand side if needed */
909   if (x && b) {
910     const PetscScalar *xx;
911     PetscScalar       *bb;
912 
913     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
914     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
917     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
918     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
919   }
920 
921   sA = mat->A->nonzerostate;
922   sB = mat->B->nonzerostate;
923 
924   if (diag != 0.0 && cong) {
925     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
926     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
927   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
928     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
929     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
930     PetscInt   nnwA, nnwB;
931     PetscBool  nnzA, nnzB;
932 
933     nnwA = aijA->nonew;
934     nnwB = aijB->nonew;
935     nnzA = aijA->keepnonzeropattern;
936     nnzB = aijB->keepnonzeropattern;
937     if (!nnzA) {
938       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
939       aijA->nonew = 0;
940     }
941     if (!nnzB) {
942       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
943       aijB->nonew = 0;
944     }
945     /* Must zero here before the next loop */
946     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
947     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
948     for (r = 0; r < len; ++r) {
949       const PetscInt row = lrows[r] + A->rmap->rstart;
950       if (row >= A->cmap->N) continue;
951       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
952     }
953     aijA->nonew = nnwA;
954     aijB->nonew = nnwB;
955   } else {
956     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
957     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958   }
959   ierr = PetscFree(lrows);CHKERRQ(ierr);
960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962 
963   /* reduce nonzerostate */
964   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
965   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
966   if (gch) A->nonzerostate++;
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
971 {
972   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode    ierr;
974   PetscMPIInt       n = A->rmap->n;
975   PetscInt          i,j,r,m,p = 0,len = 0;
976   PetscInt          *lrows,*owners = A->rmap->range;
977   PetscSFNode       *rrows;
978   PetscSF           sf;
979   const PetscScalar *xx;
980   PetscScalar       *bb,*mask;
981   Vec               xmask,lmask;
982   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
983   const PetscInt    *aj, *ii,*ridx;
984   PetscScalar       *aa;
985 
986   PetscFunctionBegin;
987   /* Create SF where leaves are input rows and roots are owned rows */
988   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
989   for (r = 0; r < n; ++r) lrows[r] = -1;
990   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
991   for (r = 0; r < N; ++r) {
992     const PetscInt idx   = rows[r];
993     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
994     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
995       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
996     }
997     rrows[r].rank  = p;
998     rrows[r].index = rows[r] - owners[p];
999   }
1000   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1001   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1002   /* Collect flags for rows to be zeroed */
1003   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1004   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1005   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1006   /* Compress and put in row numbers */
1007   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1008   /* zero diagonal part of matrix */
1009   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1010   /* handle off diagonal part of matrix */
1011   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1012   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1013   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1014   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1015   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1016   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1017   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1019   if (x && b) { /* this code is buggy when the row and column layout don't match */
1020     PetscBool cong;
1021 
1022     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1023     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1024     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1027     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1028   }
1029   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1030   /* remove zeroed rows of off diagonal matrix */
1031   ii = aij->i;
1032   for (i=0; i<len; i++) {
1033     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1034   }
1035   /* loop over all elements of off process part of matrix zeroing removed columns*/
1036   if (aij->compressedrow.use) {
1037     m    = aij->compressedrow.nrows;
1038     ii   = aij->compressedrow.i;
1039     ridx = aij->compressedrow.rindex;
1040     for (i=0; i<m; i++) {
1041       n  = ii[i+1] - ii[i];
1042       aj = aij->j + ii[i];
1043       aa = aij->a + ii[i];
1044 
1045       for (j=0; j<n; j++) {
1046         if (PetscAbsScalar(mask[*aj])) {
1047           if (b) bb[*ridx] -= *aa*xx[*aj];
1048           *aa = 0.0;
1049         }
1050         aa++;
1051         aj++;
1052       }
1053       ridx++;
1054     }
1055   } else { /* do not use compressed row format */
1056     m = l->B->rmap->n;
1057     for (i=0; i<m; i++) {
1058       n  = ii[i+1] - ii[i];
1059       aj = aij->j + ii[i];
1060       aa = aij->a + ii[i];
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[i] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069     }
1070   }
1071   if (x && b) {
1072     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1073     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1074   }
1075   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1076   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1077   ierr = PetscFree(lrows);CHKERRQ(ierr);
1078 
1079   /* only change matrix nonzero state if pattern was allowed to be changed */
1080   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1081     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1082     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt       nt;
1092   VecScatter     Mvctx = a->Mvctx;
1093 
1094   PetscFunctionBegin;
1095   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1096   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1097 
1098   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1100   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1101   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1116 {
1117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1118   PetscErrorCode ierr;
1119   VecScatter     Mvctx = a->Mvctx;
1120 
1121   PetscFunctionBegin;
1122   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1123   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1124   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1125   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   /* do nondiagonal part */
1137   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1138   /* do local part */
1139   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140   /* add partial results together */
1141   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1147 {
1148   MPI_Comm       comm;
1149   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1150   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1151   IS             Me,Notme;
1152   PetscErrorCode ierr;
1153   PetscInt       M,N,first,last,*notme,i;
1154   PetscBool      lf;
1155   PetscMPIInt    size;
1156 
1157   PetscFunctionBegin;
1158   /* Easy test: symmetric diagonal block */
1159   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1160   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1161   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1162   if (!*f) PetscFunctionReturn(0);
1163   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1165   if (size == 1) PetscFunctionReturn(0);
1166 
1167   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1168   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1169   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1170   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1171   for (i=0; i<first; i++) notme[i] = i;
1172   for (i=last; i<M; i++) notme[i-last+first] = i;
1173   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1174   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1175   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1176   Aoff = Aoffs[0];
1177   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1178   Boff = Boffs[0];
1179   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1180   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1182   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1184   ierr = PetscFree(notme);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1189 {
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   /* do nondiagonal part */
1204   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1205   /* do local part */
1206   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1207   /* add partial results together */
1208   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1218 {
1219   PetscErrorCode ierr;
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221 
1222   PetscFunctionBegin;
1223   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1224   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1225   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1241 {
1242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246 #if defined(PETSC_USE_LOG)
1247   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1248 #endif
1249   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1250   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1251   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1252   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1253 #if defined(PETSC_USE_CTABLE)
1254   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1255 #else
1256   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1257 #endif
1258   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1259   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1260   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1261   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1262   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1263   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1264   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1265 
1266   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1275 #if defined(PETSC_HAVE_ELEMENTAL)
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1277 #endif
1278 #if defined(PETSC_HAVE_HYPRE)
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1288 {
1289   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1290   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1291   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1292   PetscErrorCode ierr;
1293   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1294   int            fd;
1295   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1296   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1297   PetscScalar    *column_values;
1298   PetscInt       message_count,flowcontrolcount;
1299   FILE           *file;
1300 
1301   PetscFunctionBegin;
1302   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1303   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1304   nz   = A->nz + B->nz;
1305   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1306   if (!rank) {
1307     header[0] = MAT_FILE_CLASSID;
1308     header[1] = mat->rmap->N;
1309     header[2] = mat->cmap->N;
1310 
1311     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1312     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     /* get largest number of rows any processor has */
1314     rlen  = mat->rmap->n;
1315     range = mat->rmap->range;
1316     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1317   } else {
1318     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1319     rlen = mat->rmap->n;
1320   }
1321 
1322   /* load up the local row counts */
1323   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1324   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1339     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* th processor needs space a largest processor needs */
1346   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1348   cnt   = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ((col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1355     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1356   }
1357   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1358 
1359   /* store the column indices to the file */
1360   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1361   if (!rank) {
1362     MPI_Status status;
1363     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1364     for (i=1; i<size; i++) {
1365       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1366       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1367       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1368       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1370     }
1371     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1372   } else {
1373     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1374     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1375     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1376     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1377   }
1378   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1379 
1380   /* load up the local column values */
1381   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1382   cnt  = 0;
1383   for (i=0; i<mat->rmap->n; i++) {
1384     for (j=B->i[i]; j<B->i[i+1]; j++) {
1385       if (garray[B->j[j]] > cstart) break;
1386       column_values[cnt++] = B->a[j];
1387     }
1388     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1389     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1390   }
1391   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1392 
1393   /* store the column values to the file */
1394   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1395   if (!rank) {
1396     MPI_Status status;
1397     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1398     for (i=1; i<size; i++) {
1399       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1400       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1401       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1402       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1403       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1404     }
1405     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1406   } else {
1407     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1408     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1409     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1410     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1411   }
1412   ierr = PetscFree(column_values);CHKERRQ(ierr);
1413 
1414   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1415   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #include <petscdraw.h>
1420 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1421 {
1422   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1423   PetscErrorCode    ierr;
1424   PetscMPIInt       rank = aij->rank,size = aij->size;
1425   PetscBool         isdraw,iascii,isbinary;
1426   PetscViewer       sviewer;
1427   PetscViewerFormat format;
1428 
1429   PetscFunctionBegin;
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1433   if (iascii) {
1434     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1435     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1436       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1437       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1438       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1439       for (i=0; i<(PetscInt)size; i++) {
1440         nmax = PetscMax(nmax,nz[i]);
1441         nmin = PetscMin(nmin,nz[i]);
1442         navg += nz[i];
1443       }
1444       ierr = PetscFree(nz);CHKERRQ(ierr);
1445       navg = navg/size;
1446       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1447       PetscFunctionReturn(0);
1448     }
1449     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1450     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1451       MatInfo   info;
1452       PetscBool inodes;
1453 
1454       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1455       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1456       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1458       if (!inodes) {
1459         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1460                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1461       } else {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1463                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1464       }
1465       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1467       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1468       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1469       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1470       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1472       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1475       PetscInt inodecount,inodelimit,*inodes;
1476       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1477       if (inodes) {
1478         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1479       } else {
1480         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1481       }
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1484       PetscFunctionReturn(0);
1485     }
1486   } else if (isbinary) {
1487     if (size == 1) {
1488       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1489       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1490     } else {
1491       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1492     }
1493     PetscFunctionReturn(0);
1494   } else if (iascii && size == 1) {
1495     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1496     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1497     PetscFunctionReturn(0);
1498   } else if (isdraw) {
1499     PetscDraw draw;
1500     PetscBool isnull;
1501     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1502     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1503     if (isnull) PetscFunctionReturn(0);
1504   }
1505 
1506   { /* assemble the entire matrix onto first processor */
1507     Mat A = NULL, Av;
1508     IS  isrow,iscol;
1509 
1510     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1511     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1512     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1513     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1514 /*  The commented code uses MatCreateSubMatrices instead */
1515 /*
1516     Mat *AA, A = NULL, Av;
1517     IS  isrow,iscol;
1518 
1519     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1520     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1521     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1522     if (!rank) {
1523        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1524        A    = AA[0];
1525        Av   = AA[0];
1526     }
1527     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1528 */
1529     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1530     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1531     /*
1532        Everyone has to call to draw the matrix since the graphics waits are
1533        synchronized across all processors that share the PetscDraw object
1534     */
1535     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1536     if (!rank) {
1537       if (((PetscObject)mat)->name) {
1538         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1539       }
1540       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1541     }
1542     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1543     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1544     ierr = MatDestroy(&A);CHKERRQ(ierr);
1545   }
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1550 {
1551   PetscErrorCode ierr;
1552   PetscBool      iascii,isdraw,issocket,isbinary;
1553 
1554   PetscFunctionBegin;
1555   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1556   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1557   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1558   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1559   if (iascii || isdraw || isbinary || issocket) {
1560     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1566 {
1567   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1568   PetscErrorCode ierr;
1569   Vec            bb1 = 0;
1570   PetscBool      hasop;
1571 
1572   PetscFunctionBegin;
1573   if (flag == SOR_APPLY_UPPER) {
1574     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1575     PetscFunctionReturn(0);
1576   }
1577 
1578   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1579     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1580   }
1581 
1582   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587 
1588     while (its--) {
1589       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1591 
1592       /* update rhs: bb1 = bb - B*x */
1593       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1594       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1595 
1596       /* local sweep */
1597       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1598     }
1599   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1600     if (flag & SOR_ZERO_INITIAL_GUESS) {
1601       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602       its--;
1603     }
1604     while (its--) {
1605       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607 
1608       /* update rhs: bb1 = bb - B*x */
1609       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1610       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1611 
1612       /* local sweep */
1613       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1614     }
1615   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1616     if (flag & SOR_ZERO_INITIAL_GUESS) {
1617       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1618       its--;
1619     }
1620     while (its--) {
1621       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1622       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1623 
1624       /* update rhs: bb1 = bb - B*x */
1625       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1626       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1627 
1628       /* local sweep */
1629       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1630     }
1631   } else if (flag & SOR_EISENSTAT) {
1632     Vec xx1;
1633 
1634     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1635     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1636 
1637     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1638     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1639     if (!mat->diag) {
1640       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1641       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1642     }
1643     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1644     if (hasop) {
1645       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1646     } else {
1647       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1648     }
1649     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1650 
1651     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1652 
1653     /* local sweep */
1654     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1655     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1656     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1657   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1658 
1659   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1660 
1661   matin->factorerrortype = mat->A->factorerrortype;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1666 {
1667   Mat            aA,aB,Aperm;
1668   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1669   PetscScalar    *aa,*ba;
1670   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1671   PetscSF        rowsf,sf;
1672   IS             parcolp = NULL;
1673   PetscBool      done;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1678   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1679   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1680   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1681 
1682   /* Invert row permutation to find out where my rows should go */
1683   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1684   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1685   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1686   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1687   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1688   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1689 
1690   /* Invert column permutation to find out where my columns should go */
1691   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1692   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1693   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1694   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1695   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1696   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1697   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1698 
1699   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1700   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1701   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1702 
1703   /* Find out where my gcols should go */
1704   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1705   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1706   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1707   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1708   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1709   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1710   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1711   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1712 
1713   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1714   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1715   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1716   for (i=0; i<m; i++) {
1717     PetscInt row = rdest[i],rowner;
1718     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1719     for (j=ai[i]; j<ai[i+1]; j++) {
1720       PetscInt cowner,col = cdest[aj[j]];
1721       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1722       if (rowner == cowner) dnnz[i]++;
1723       else onnz[i]++;
1724     }
1725     for (j=bi[i]; j<bi[i+1]; j++) {
1726       PetscInt cowner,col = gcdest[bj[j]];
1727       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1728       if (rowner == cowner) dnnz[i]++;
1729       else onnz[i]++;
1730     }
1731   }
1732   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1733   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1734   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1735   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1737 
1738   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1739   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1740   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1741   for (i=0; i<m; i++) {
1742     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1743     PetscInt j0,rowlen;
1744     rowlen = ai[i+1] - ai[i];
1745     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1746       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1747       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1748     }
1749     rowlen = bi[i+1] - bi[i];
1750     for (j0=j=0; j<rowlen; j0=j) {
1751       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1752       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1753     }
1754   }
1755   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1756   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1757   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1758   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1759   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1760   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1761   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1762   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1763   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1764   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1765   *B = Aperm;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1770 {
1771   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1772   PetscErrorCode ierr;
1773 
1774   PetscFunctionBegin;
1775   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1776   if (ghosts) *ghosts = aij->garray;
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1781 {
1782   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1783   Mat            A    = mat->A,B = mat->B;
1784   PetscErrorCode ierr;
1785   PetscLogDouble isend[5],irecv[5];
1786 
1787   PetscFunctionBegin;
1788   info->block_size = 1.0;
1789   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1790 
1791   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1792   isend[3] = info->memory;  isend[4] = info->mallocs;
1793 
1794   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1795 
1796   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1797   isend[3] += info->memory;  isend[4] += info->mallocs;
1798   if (flag == MAT_LOCAL) {
1799     info->nz_used      = isend[0];
1800     info->nz_allocated = isend[1];
1801     info->nz_unneeded  = isend[2];
1802     info->memory       = isend[3];
1803     info->mallocs      = isend[4];
1804   } else if (flag == MAT_GLOBAL_MAX) {
1805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1806 
1807     info->nz_used      = irecv[0];
1808     info->nz_allocated = irecv[1];
1809     info->nz_unneeded  = irecv[2];
1810     info->memory       = irecv[3];
1811     info->mallocs      = irecv[4];
1812   } else if (flag == MAT_GLOBAL_SUM) {
1813     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1814 
1815     info->nz_used      = irecv[0];
1816     info->nz_allocated = irecv[1];
1817     info->nz_unneeded  = irecv[2];
1818     info->memory       = irecv[3];
1819     info->mallocs      = irecv[4];
1820   }
1821   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1822   info->fill_ratio_needed = 0;
1823   info->factor_mallocs    = 0;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1828 {
1829   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1830   PetscErrorCode ierr;
1831 
1832   PetscFunctionBegin;
1833   switch (op) {
1834   case MAT_NEW_NONZERO_LOCATIONS:
1835   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1836   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1837   case MAT_KEEP_NONZERO_PATTERN:
1838   case MAT_NEW_NONZERO_LOCATION_ERR:
1839   case MAT_USE_INODES:
1840   case MAT_IGNORE_ZERO_ENTRIES:
1841     MatCheckPreallocated(A,1);
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1844     break;
1845   case MAT_ROW_ORIENTED:
1846     MatCheckPreallocated(A,1);
1847     a->roworiented = flg;
1848 
1849     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1850     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1851     break;
1852   case MAT_NEW_DIAGONALS:
1853   case MAT_SORTED_FULL:
1854     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1855     break;
1856   case MAT_IGNORE_OFF_PROC_ENTRIES:
1857     a->donotstash = flg;
1858     break;
1859   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1860   case MAT_SPD:
1861   case MAT_SYMMETRIC:
1862   case MAT_STRUCTURALLY_SYMMETRIC:
1863   case MAT_HERMITIAN:
1864   case MAT_SYMMETRY_ETERNAL:
1865     break;
1866   case MAT_SUBMAT_SINGLEIS:
1867     A->submat_singleis = flg;
1868     break;
1869   case MAT_STRUCTURE_ONLY:
1870     /* The option is handled directly by MatSetOption() */
1871     break;
1872   default:
1873     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1879 {
1880   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1881   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1882   PetscErrorCode ierr;
1883   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1884   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1885   PetscInt       *cmap,*idx_p;
1886 
1887   PetscFunctionBegin;
1888   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1889   mat->getrowactive = PETSC_TRUE;
1890 
1891   if (!mat->rowvalues && (idx || v)) {
1892     /*
1893         allocate enough space to hold information from the longest row.
1894     */
1895     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1896     PetscInt   max = 1,tmp;
1897     for (i=0; i<matin->rmap->n; i++) {
1898       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1899       if (max < tmp) max = tmp;
1900     }
1901     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1902   }
1903 
1904   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1905   lrow = row - rstart;
1906 
1907   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1908   if (!v)   {pvA = 0; pvB = 0;}
1909   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1910   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1911   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1912   nztot = nzA + nzB;
1913 
1914   cmap = mat->garray;
1915   if (v  || idx) {
1916     if (nztot) {
1917       /* Sort by increasing column numbers, assuming A and B already sorted */
1918       PetscInt imark = -1;
1919       if (v) {
1920         *v = v_p = mat->rowvalues;
1921         for (i=0; i<nzB; i++) {
1922           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1923           else break;
1924         }
1925         imark = i;
1926         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1927         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1928       }
1929       if (idx) {
1930         *idx = idx_p = mat->rowindices;
1931         if (imark > -1) {
1932           for (i=0; i<imark; i++) {
1933             idx_p[i] = cmap[cworkB[i]];
1934           }
1935         } else {
1936           for (i=0; i<nzB; i++) {
1937             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1938             else break;
1939           }
1940           imark = i;
1941         }
1942         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1943         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1944       }
1945     } else {
1946       if (idx) *idx = 0;
1947       if (v)   *v   = 0;
1948     }
1949   }
1950   *nz  = nztot;
1951   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1957 {
1958   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1959 
1960   PetscFunctionBegin;
1961   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1962   aij->getrowactive = PETSC_FALSE;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1967 {
1968   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1969   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1970   PetscErrorCode ierr;
1971   PetscInt       i,j,cstart = mat->cmap->rstart;
1972   PetscReal      sum = 0.0;
1973   MatScalar      *v;
1974 
1975   PetscFunctionBegin;
1976   if (aij->size == 1) {
1977     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1978   } else {
1979     if (type == NORM_FROBENIUS) {
1980       v = amat->a;
1981       for (i=0; i<amat->nz; i++) {
1982         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1983       }
1984       v = bmat->a;
1985       for (i=0; i<bmat->nz; i++) {
1986         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1987       }
1988       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       *norm = PetscSqrtReal(*norm);
1990       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1991     } else if (type == NORM_1) { /* max column norm */
1992       PetscReal *tmp,*tmp2;
1993       PetscInt  *jj,*garray = aij->garray;
1994       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1995       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1996       *norm = 0.0;
1997       v     = amat->a; jj = amat->j;
1998       for (j=0; j<amat->nz; j++) {
1999         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2000       }
2001       v = bmat->a; jj = bmat->j;
2002       for (j=0; j<bmat->nz; j++) {
2003         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2004       }
2005       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2006       for (j=0; j<mat->cmap->N; j++) {
2007         if (tmp2[j] > *norm) *norm = tmp2[j];
2008       }
2009       ierr = PetscFree(tmp);CHKERRQ(ierr);
2010       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2011       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2012     } else if (type == NORM_INFINITY) { /* max row norm */
2013       PetscReal ntemp = 0.0;
2014       for (j=0; j<aij->A->rmap->n; j++) {
2015         v   = amat->a + amat->i[j];
2016         sum = 0.0;
2017         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2018           sum += PetscAbsScalar(*v); v++;
2019         }
2020         v = bmat->a + bmat->i[j];
2021         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2022           sum += PetscAbsScalar(*v); v++;
2023         }
2024         if (sum > ntemp) ntemp = sum;
2025       }
2026       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2027       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2028     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2034 {
2035   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2036   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2037   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2038   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2039   PetscErrorCode  ierr;
2040   Mat             B,A_diag,*B_diag;
2041   const MatScalar *array;
2042 
2043   PetscFunctionBegin;
2044   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2045   ai = Aloc->i; aj = Aloc->j;
2046   bi = Bloc->i; bj = Bloc->j;
2047   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2048     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2049     PetscSFNode          *oloc;
2050     PETSC_UNUSED PetscSF sf;
2051 
2052     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2053     /* compute d_nnz for preallocation */
2054     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2055     for (i=0; i<ai[ma]; i++) {
2056       d_nnz[aj[i]]++;
2057     }
2058     /* compute local off-diagonal contributions */
2059     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2060     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2061     /* map those to global */
2062     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2063     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2064     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2065     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2066     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2067     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2068     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2069 
2070     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2071     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2072     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2073     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2074     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2075     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2076   } else {
2077     B    = *matout;
2078     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2079   }
2080 
2081   b           = (Mat_MPIAIJ*)B->data;
2082   A_diag      = a->A;
2083   B_diag      = &b->A;
2084   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2085   A_diag_ncol = A_diag->cmap->N;
2086   B_diag_ilen = sub_B_diag->ilen;
2087   B_diag_i    = sub_B_diag->i;
2088 
2089   /* Set ilen for diagonal of B */
2090   for (i=0; i<A_diag_ncol; i++) {
2091     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2092   }
2093 
2094   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2095   very quickly (=without using MatSetValues), because all writes are local. */
2096   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2097 
2098   /* copy over the B part */
2099   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2100   array = Bloc->a;
2101   row   = A->rmap->rstart;
2102   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2103   cols_tmp = cols;
2104   for (i=0; i<mb; i++) {
2105     ncol = bi[i+1]-bi[i];
2106     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2107     row++;
2108     array += ncol; cols_tmp += ncol;
2109   }
2110   ierr = PetscFree(cols);CHKERRQ(ierr);
2111 
2112   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2113   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2114   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2115     *matout = B;
2116   } else {
2117     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2123 {
2124   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2125   Mat            a    = aij->A,b = aij->B;
2126   PetscErrorCode ierr;
2127   PetscInt       s1,s2,s3;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2131   if (rr) {
2132     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2133     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2134     /* Overlap communication with computation. */
2135     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2136   }
2137   if (ll) {
2138     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2139     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2140     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2141   }
2142   /* scale  the diagonal block */
2143   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2144 
2145   if (rr) {
2146     /* Do a scatter end and then right scale the off-diagonal block */
2147     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2148     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2154 {
2155   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2164 {
2165   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2166   Mat            a,b,c,d;
2167   PetscBool      flg;
2168   PetscErrorCode ierr;
2169 
2170   PetscFunctionBegin;
2171   a = matA->A; b = matA->B;
2172   c = matB->A; d = matB->B;
2173 
2174   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2175   if (flg) {
2176     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2177   }
2178   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2183 {
2184   PetscErrorCode ierr;
2185   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2186   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2187 
2188   PetscFunctionBegin;
2189   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2190   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2191     /* because of the column compression in the off-processor part of the matrix a->B,
2192        the number of columns in a->B and b->B may be different, hence we cannot call
2193        the MatCopy() directly on the two parts. If need be, we can provide a more
2194        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2195        then copying the submatrices */
2196     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2197   } else {
2198     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2199     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2200   }
2201   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2206 {
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 /*
2215    Computes the number of nonzeros per row needed for preallocation when X and Y
2216    have different nonzero structure.
2217 */
2218 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2219 {
2220   PetscInt       i,j,k,nzx,nzy;
2221 
2222   PetscFunctionBegin;
2223   /* Set the number of nonzeros in the new matrix */
2224   for (i=0; i<m; i++) {
2225     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2226     nzx = xi[i+1] - xi[i];
2227     nzy = yi[i+1] - yi[i];
2228     nnz[i] = 0;
2229     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2230       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2231       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2232       nnz[i]++;
2233     }
2234     for (; k<nzy; k++) nnz[i]++;
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2240 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2241 {
2242   PetscErrorCode ierr;
2243   PetscInt       m = Y->rmap->N;
2244   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2245   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2246 
2247   PetscFunctionBegin;
2248   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2253 {
2254   PetscErrorCode ierr;
2255   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2256   PetscBLASInt   bnz,one=1;
2257   Mat_SeqAIJ     *x,*y;
2258 
2259   PetscFunctionBegin;
2260   if (str == SAME_NONZERO_PATTERN) {
2261     PetscScalar alpha = a;
2262     x    = (Mat_SeqAIJ*)xx->A->data;
2263     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2264     y    = (Mat_SeqAIJ*)yy->A->data;
2265     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2266     x    = (Mat_SeqAIJ*)xx->B->data;
2267     y    = (Mat_SeqAIJ*)yy->B->data;
2268     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2269     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2270     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2271     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2272        will be updated */
2273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2274     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2275       Y->offloadmask = PETSC_OFFLOAD_CPU;
2276     }
2277 #endif
2278   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2279     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2280   } else {
2281     Mat      B;
2282     PetscInt *nnz_d,*nnz_o;
2283     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2284     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2285     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2286     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2287     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2288     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2289     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2290     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2291     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2292     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2293     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2294     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2295     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2296     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2297   }
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2302 
2303 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2304 {
2305 #if defined(PETSC_USE_COMPLEX)
2306   PetscErrorCode ierr;
2307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2311   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2312 #else
2313   PetscFunctionBegin;
2314 #endif
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2319 {
2320   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2321   PetscErrorCode ierr;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2325   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2330 {
2331   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2336   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,*idxb = 0;
2345   PetscScalar    *va,*vb;
2346   Vec            vtmp;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351   if (idx) {
2352     for (i=0; i<A->rmap->n; i++) {
2353       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2354     }
2355   }
2356 
2357   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2358   if (idx) {
2359     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2360   }
2361   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2362   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2363 
2364   for (i=0; i<A->rmap->n; i++) {
2365     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2366       va[i] = vb[i];
2367       if (idx) idx[i] = a->garray[idxb[i]];
2368     }
2369   }
2370 
2371   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2373   ierr = PetscFree(idxb);CHKERRQ(ierr);
2374   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2379 {
2380   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2381   PetscErrorCode ierr;
2382   PetscInt       i,*idxb = 0;
2383   PetscScalar    *va,*vb;
2384   Vec            vtmp;
2385 
2386   PetscFunctionBegin;
2387   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2388   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2389   if (idx) {
2390     for (i=0; i<A->cmap->n; i++) {
2391       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2392     }
2393   }
2394 
2395   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2396   if (idx) {
2397     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2398   }
2399   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2400   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2401 
2402   for (i=0; i<A->rmap->n; i++) {
2403     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2404       va[i] = vb[i];
2405       if (idx) idx[i] = a->garray[idxb[i]];
2406     }
2407   }
2408 
2409   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2411   ierr = PetscFree(idxb);CHKERRQ(ierr);
2412   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2417 {
2418   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2419   PetscInt       n      = A->rmap->n;
2420   PetscInt       cstart = A->cmap->rstart;
2421   PetscInt       *cmap  = mat->garray;
2422   PetscInt       *diagIdx, *offdiagIdx;
2423   Vec            diagV, offdiagV;
2424   PetscScalar    *a, *diagA, *offdiagA;
2425   PetscInt       r;
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2430   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2432   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2433   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2434   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2435   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2436   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2437   for (r = 0; r < n; ++r) {
2438     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2439       a[r]   = diagA[r];
2440       idx[r] = cstart + diagIdx[r];
2441     } else {
2442       a[r]   = offdiagA[r];
2443       idx[r] = cmap[offdiagIdx[r]];
2444     }
2445   }
2446   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2447   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2449   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2450   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2451   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2456 {
2457   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2458   PetscInt       n      = A->rmap->n;
2459   PetscInt       cstart = A->cmap->rstart;
2460   PetscInt       *cmap  = mat->garray;
2461   PetscInt       *diagIdx, *offdiagIdx;
2462   Vec            diagV, offdiagV;
2463   PetscScalar    *a, *diagA, *offdiagA;
2464   PetscInt       r;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2469   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2470   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2471   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2472   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2473   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2474   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2475   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2476   for (r = 0; r < n; ++r) {
2477     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2478       a[r]   = diagA[r];
2479       idx[r] = cstart + diagIdx[r];
2480     } else {
2481       a[r]   = offdiagA[r];
2482       idx[r] = cmap[offdiagIdx[r]];
2483     }
2484   }
2485   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2486   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2487   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2488   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2489   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2490   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2495 {
2496   PetscErrorCode ierr;
2497   Mat            *dummy;
2498 
2499   PetscFunctionBegin;
2500   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2501   *newmat = *dummy;
2502   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2507 {
2508   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2509   PetscErrorCode ierr;
2510 
2511   PetscFunctionBegin;
2512   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2513   A->factorerrortype = a->A->factorerrortype;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2518 {
2519   PetscErrorCode ierr;
2520   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2521 
2522   PetscFunctionBegin;
2523   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2524   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2525   if (x->assembled) {
2526     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2527   } else {
2528     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2529   }
2530   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2531   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2536 {
2537   PetscFunctionBegin;
2538   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2539   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 /*@
2544    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2545 
2546    Collective on Mat
2547 
2548    Input Parameters:
2549 +    A - the matrix
2550 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2551 
2552  Level: advanced
2553 
2554 @*/
2555 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2556 {
2557   PetscErrorCode       ierr;
2558 
2559   PetscFunctionBegin;
2560   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2565 {
2566   PetscErrorCode       ierr;
2567   PetscBool            sc = PETSC_FALSE,flg;
2568 
2569   PetscFunctionBegin;
2570   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2571   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2572   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2573   if (flg) {
2574     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2575   }
2576   ierr = PetscOptionsTail();CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2581 {
2582   PetscErrorCode ierr;
2583   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2584   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2585 
2586   PetscFunctionBegin;
2587   if (!Y->preallocated) {
2588     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2589   } else if (!aij->nz) {
2590     PetscInt nonew = aij->nonew;
2591     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2592     aij->nonew = nonew;
2593   }
2594   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2599 {
2600   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2601   PetscErrorCode ierr;
2602 
2603   PetscFunctionBegin;
2604   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2605   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2606   if (d) {
2607     PetscInt rstart;
2608     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2609     *d += rstart;
2610 
2611   }
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2616 {
2617   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2618   PetscErrorCode ierr;
2619 
2620   PetscFunctionBegin;
2621   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 /* -------------------------------------------------------------------*/
2626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2627                                        MatGetRow_MPIAIJ,
2628                                        MatRestoreRow_MPIAIJ,
2629                                        MatMult_MPIAIJ,
2630                                 /* 4*/ MatMultAdd_MPIAIJ,
2631                                        MatMultTranspose_MPIAIJ,
2632                                        MatMultTransposeAdd_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*10*/ 0,
2637                                        0,
2638                                        0,
2639                                        MatSOR_MPIAIJ,
2640                                        MatTranspose_MPIAIJ,
2641                                 /*15*/ MatGetInfo_MPIAIJ,
2642                                        MatEqual_MPIAIJ,
2643                                        MatGetDiagonal_MPIAIJ,
2644                                        MatDiagonalScale_MPIAIJ,
2645                                        MatNorm_MPIAIJ,
2646                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2647                                        MatAssemblyEnd_MPIAIJ,
2648                                        MatSetOption_MPIAIJ,
2649                                        MatZeroEntries_MPIAIJ,
2650                                 /*24*/ MatZeroRows_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*29*/ MatSetUp_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        MatGetDiagonalBlock_MPIAIJ,
2659                                        0,
2660                                 /*34*/ MatDuplicate_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*39*/ MatAXPY_MPIAIJ,
2666                                        MatCreateSubMatrices_MPIAIJ,
2667                                        MatIncreaseOverlap_MPIAIJ,
2668                                        MatGetValues_MPIAIJ,
2669                                        MatCopy_MPIAIJ,
2670                                 /*44*/ MatGetRowMax_MPIAIJ,
2671                                        MatScale_MPIAIJ,
2672                                        MatShift_MPIAIJ,
2673                                        MatDiagonalSet_MPIAIJ,
2674                                        MatZeroRowsColumns_MPIAIJ,
2675                                 /*49*/ MatSetRandom_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2681                                        0,
2682                                        MatSetUnfactored_MPIAIJ,
2683                                        MatPermute_MPIAIJ,
2684                                        0,
2685                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2686                                        MatDestroy_MPIAIJ,
2687                                        MatView_MPIAIJ,
2688                                        0,
2689                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2690                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2691                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2696                                        MatGetRowMinAbs_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                 /*75*/ MatFDColoringApply_AIJ,
2702                                        MatSetFromOptions_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                        MatFindZeroDiagonals_MPIAIJ,
2706                                 /*80*/ 0,
2707                                        0,
2708                                        0,
2709                                 /*83*/ MatLoad_MPIAIJ,
2710                                        MatIsSymmetric_MPIAIJ,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2716                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2717                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2718                                        MatPtAP_MPIAIJ_MPIAIJ,
2719                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2720                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        MatPinToCPU_MPIAIJ,
2725                                 /*99*/ 0,
2726                                        0,
2727                                        0,
2728                                        MatConjugate_MPIAIJ,
2729                                        0,
2730                                 /*104*/MatSetValuesRow_MPIAIJ,
2731                                        MatRealPart_MPIAIJ,
2732                                        MatImaginaryPart_MPIAIJ,
2733                                        0,
2734                                        0,
2735                                 /*109*/0,
2736                                        0,
2737                                        MatGetRowMin_MPIAIJ,
2738                                        0,
2739                                        MatMissingDiagonal_MPIAIJ,
2740                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2741                                        0,
2742                                        MatGetGhosts_MPIAIJ,
2743                                        0,
2744                                        0,
2745                                 /*119*/0,
2746                                        0,
2747                                        0,
2748                                        0,
2749                                        MatGetMultiProcBlock_MPIAIJ,
2750                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2751                                        MatGetColumnNorms_MPIAIJ,
2752                                        MatInvertBlockDiagonal_MPIAIJ,
2753                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2754                                        MatCreateSubMatricesMPI_MPIAIJ,
2755                                 /*129*/0,
2756                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2757                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2758                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2759                                        0,
2760                                 /*134*/0,
2761                                        0,
2762                                        MatRARt_MPIAIJ_MPIAIJ,
2763                                        0,
2764                                        0,
2765                                 /*139*/MatSetBlockSizes_MPIAIJ,
2766                                        0,
2767                                        0,
2768                                        MatFDColoringSetUp_MPIXAIJ,
2769                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2770                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2771 };
2772 
2773 /* ----------------------------------------------------------------------------------------*/
2774 
2775 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2776 {
2777   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2778   PetscErrorCode ierr;
2779 
2780   PetscFunctionBegin;
2781   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2782   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2787 {
2788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2793   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2798 {
2799   Mat_MPIAIJ     *b;
2800   PetscErrorCode ierr;
2801   PetscMPIInt    size;
2802 
2803   PetscFunctionBegin;
2804   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2806   b = (Mat_MPIAIJ*)B->data;
2807 
2808 #if defined(PETSC_USE_CTABLE)
2809   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2810 #else
2811   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2812 #endif
2813   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2814   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2815   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2816 
2817   /* Because the B will have been resized we simply destroy it and create a new one each time */
2818   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2819   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2820   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2821   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2822   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2823   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2824   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2825 
2826   if (!B->preallocated) {
2827     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2828     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2829     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2830     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2831     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2832   }
2833 
2834   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2835   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2836   B->preallocated  = PETSC_TRUE;
2837   B->was_assembled = PETSC_FALSE;
2838   B->assembled     = PETSC_FALSE;
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2843 {
2844   Mat_MPIAIJ     *b;
2845   PetscErrorCode ierr;
2846 
2847   PetscFunctionBegin;
2848   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2849   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2850   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2851   b = (Mat_MPIAIJ*)B->data;
2852 
2853 #if defined(PETSC_USE_CTABLE)
2854   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2855 #else
2856   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2857 #endif
2858   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2859   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2860   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2861 
2862   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2863   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2864   B->preallocated  = PETSC_TRUE;
2865   B->was_assembled = PETSC_FALSE;
2866   B->assembled = PETSC_FALSE;
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2871 {
2872   Mat            mat;
2873   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2874   PetscErrorCode ierr;
2875 
2876   PetscFunctionBegin;
2877   *newmat = 0;
2878   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2879   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2880   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2881   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2882   a       = (Mat_MPIAIJ*)mat->data;
2883 
2884   mat->factortype   = matin->factortype;
2885   mat->assembled    = PETSC_TRUE;
2886   mat->insertmode   = NOT_SET_VALUES;
2887   mat->preallocated = PETSC_TRUE;
2888 
2889   a->size         = oldmat->size;
2890   a->rank         = oldmat->rank;
2891   a->donotstash   = oldmat->donotstash;
2892   a->roworiented  = oldmat->roworiented;
2893   a->rowindices   = 0;
2894   a->rowvalues    = 0;
2895   a->getrowactive = PETSC_FALSE;
2896 
2897   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2898   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2899 
2900   if (oldmat->colmap) {
2901 #if defined(PETSC_USE_CTABLE)
2902     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2903 #else
2904     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2905     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2906     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2907 #endif
2908   } else a->colmap = 0;
2909   if (oldmat->garray) {
2910     PetscInt len;
2911     len  = oldmat->B->cmap->n;
2912     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2913     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2914     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2915   } else a->garray = 0;
2916 
2917   /* It may happen MatDuplicate is called with a non-assembled matrix
2918      In fact, MatDuplicate only requires the matrix to be preallocated
2919      This may happen inside a DMCreateMatrix_Shell */
2920   if (oldmat->lvec) {
2921     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2922     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2923   }
2924   if (oldmat->Mvctx) {
2925     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2926     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2927   }
2928   if (oldmat->Mvctx_mpi1) {
2929     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2930     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2931   }
2932 
2933   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2934   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2935   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2936   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2937   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2938   *newmat = mat;
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2943 {
2944   PetscBool      isbinary, ishdf5;
2945   PetscErrorCode ierr;
2946 
2947   PetscFunctionBegin;
2948   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2949   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2950   /* force binary viewer to load .info file if it has not yet done so */
2951   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2953   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2954   if (isbinary) {
2955     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2956   } else if (ishdf5) {
2957 #if defined(PETSC_HAVE_HDF5)
2958     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2959 #else
2960     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2961 #endif
2962   } else {
2963     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2964   }
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2969 {
2970   PetscScalar    *vals,*svals;
2971   MPI_Comm       comm;
2972   PetscErrorCode ierr;
2973   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2974   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2975   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2976   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2977   PetscInt       cend,cstart,n,*rowners;
2978   int            fd;
2979   PetscInt       bs = newMat->rmap->bs;
2980 
2981   PetscFunctionBegin;
2982   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2983   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2984   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2985   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2986   if (!rank) {
2987     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2988     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2989     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2990   }
2991 
2992   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2993   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2994   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2995   if (bs < 0) bs = 1;
2996 
2997   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2998   M    = header[1]; N = header[2];
2999 
3000   /* If global sizes are set, check if they are consistent with that given in the file */
3001   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3002   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3003 
3004   /* determine ownership of all (block) rows */
3005   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3006   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3007   else m = newMat->rmap->n; /* Set by user */
3008 
3009   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3010   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3011 
3012   /* First process needs enough room for process with most rows */
3013   if (!rank) {
3014     mmax = rowners[1];
3015     for (i=2; i<=size; i++) {
3016       mmax = PetscMax(mmax, rowners[i]);
3017     }
3018   } else mmax = -1;             /* unused, but compilers complain */
3019 
3020   rowners[0] = 0;
3021   for (i=2; i<=size; i++) {
3022     rowners[i] += rowners[i-1];
3023   }
3024   rstart = rowners[rank];
3025   rend   = rowners[rank+1];
3026 
3027   /* distribute row lengths to all processors */
3028   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3029   if (!rank) {
3030     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3031     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3032     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3033     for (j=0; j<m; j++) {
3034       procsnz[0] += ourlens[j];
3035     }
3036     for (i=1; i<size; i++) {
3037       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3038       /* calculate the number of nonzeros on each processor */
3039       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3040         procsnz[i] += rowlengths[j];
3041       }
3042       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3043     }
3044     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3045   } else {
3046     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3047   }
3048 
3049   if (!rank) {
3050     /* determine max buffer needed and allocate it */
3051     maxnz = 0;
3052     for (i=0; i<size; i++) {
3053       maxnz = PetscMax(maxnz,procsnz[i]);
3054     }
3055     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3056 
3057     /* read in my part of the matrix column indices  */
3058     nz   = procsnz[0];
3059     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3060     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3061 
3062     /* read in every one elses and ship off */
3063     for (i=1; i<size; i++) {
3064       nz   = procsnz[i];
3065       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3066       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3067     }
3068     ierr = PetscFree(cols);CHKERRQ(ierr);
3069   } else {
3070     /* determine buffer space needed for message */
3071     nz = 0;
3072     for (i=0; i<m; i++) {
3073       nz += ourlens[i];
3074     }
3075     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3076 
3077     /* receive message of column indices*/
3078     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3079   }
3080 
3081   /* determine column ownership if matrix is not square */
3082   if (N != M) {
3083     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3084     else n = newMat->cmap->n;
3085     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3086     cstart = cend - n;
3087   } else {
3088     cstart = rstart;
3089     cend   = rend;
3090     n      = cend - cstart;
3091   }
3092 
3093   /* loop over local rows, determining number of off diagonal entries */
3094   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3095   jj   = 0;
3096   for (i=0; i<m; i++) {
3097     for (j=0; j<ourlens[i]; j++) {
3098       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3099       jj++;
3100     }
3101   }
3102 
3103   for (i=0; i<m; i++) {
3104     ourlens[i] -= offlens[i];
3105   }
3106   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3107 
3108   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3109 
3110   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3111 
3112   for (i=0; i<m; i++) {
3113     ourlens[i] += offlens[i];
3114   }
3115 
3116   if (!rank) {
3117     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3118 
3119     /* read in my part of the matrix numerical values  */
3120     nz   = procsnz[0];
3121     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3122 
3123     /* insert into matrix */
3124     jj      = rstart;
3125     smycols = mycols;
3126     svals   = vals;
3127     for (i=0; i<m; i++) {
3128       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3129       smycols += ourlens[i];
3130       svals   += ourlens[i];
3131       jj++;
3132     }
3133 
3134     /* read in other processors and ship out */
3135     for (i=1; i<size; i++) {
3136       nz   = procsnz[i];
3137       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3138       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3139     }
3140     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3141   } else {
3142     /* receive numeric values */
3143     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3144 
3145     /* receive message of values*/
3146     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3147 
3148     /* insert into matrix */
3149     jj      = rstart;
3150     smycols = mycols;
3151     svals   = vals;
3152     for (i=0; i<m; i++) {
3153       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3154       smycols += ourlens[i];
3155       svals   += ourlens[i];
3156       jj++;
3157     }
3158   }
3159   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3160   ierr = PetscFree(vals);CHKERRQ(ierr);
3161   ierr = PetscFree(mycols);CHKERRQ(ierr);
3162   ierr = PetscFree(rowners);CHKERRQ(ierr);
3163   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   PetscFunctionReturn(0);
3166 }
3167 
3168 /* Not scalable because of ISAllGather() unless getting all columns. */
3169 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3170 {
3171   PetscErrorCode ierr;
3172   IS             iscol_local;
3173   PetscBool      isstride;
3174   PetscMPIInt    lisstride=0,gisstride;
3175 
3176   PetscFunctionBegin;
3177   /* check if we are grabbing all columns*/
3178   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3179 
3180   if (isstride) {
3181     PetscInt  start,len,mstart,mlen;
3182     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3183     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3184     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3185     if (mstart == start && mlen-mstart == len) lisstride = 1;
3186   }
3187 
3188   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3189   if (gisstride) {
3190     PetscInt N;
3191     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3192     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3193     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3194     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3195   } else {
3196     PetscInt cbs;
3197     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3198     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3199     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3200   }
3201 
3202   *isseq = iscol_local;
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 /*
3207  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3208  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3209 
3210  Input Parameters:
3211    mat - matrix
3212    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3213            i.e., mat->rstart <= isrow[i] < mat->rend
3214    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3215            i.e., mat->cstart <= iscol[i] < mat->cend
3216  Output Parameter:
3217    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3218    iscol_o - sequential column index set for retrieving mat->B
3219    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3220  */
3221 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3222 {
3223   PetscErrorCode ierr;
3224   Vec            x,cmap;
3225   const PetscInt *is_idx;
3226   PetscScalar    *xarray,*cmaparray;
3227   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3228   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3229   Mat            B=a->B;
3230   Vec            lvec=a->lvec,lcmap;
3231   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3232   MPI_Comm       comm;
3233   VecScatter     Mvctx=a->Mvctx;
3234 
3235   PetscFunctionBegin;
3236   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3237   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3238 
3239   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3240   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3241   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3242   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3243   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3244 
3245   /* Get start indices */
3246   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3247   isstart -= ncols;
3248   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3249 
3250   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3251   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3252   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3253   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3254   for (i=0; i<ncols; i++) {
3255     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3256     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3257     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3258   }
3259   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3260   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3261   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3262 
3263   /* Get iscol_d */
3264   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3265   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3266   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3267 
3268   /* Get isrow_d */
3269   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3270   rstart = mat->rmap->rstart;
3271   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3272   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3273   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3274   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3275 
3276   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3277   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3278   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3279 
3280   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3281   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3282   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3283 
3284   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3285 
3286   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3287   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3288 
3289   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3290   /* off-process column indices */
3291   count = 0;
3292   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3293   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3294 
3295   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3296   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3297   for (i=0; i<Bn; i++) {
3298     if (PetscRealPart(xarray[i]) > -1.0) {
3299       idx[count]     = i;                   /* local column index in off-diagonal part B */
3300       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3301       count++;
3302     }
3303   }
3304   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3305   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3306 
3307   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3308   /* cannot ensure iscol_o has same blocksize as iscol! */
3309 
3310   ierr = PetscFree(idx);CHKERRQ(ierr);
3311   *garray = cmap1;
3312 
3313   ierr = VecDestroy(&x);CHKERRQ(ierr);
3314   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3315   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3316   PetscFunctionReturn(0);
3317 }
3318 
3319 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3320 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3321 {
3322   PetscErrorCode ierr;
3323   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3324   Mat            M = NULL;
3325   MPI_Comm       comm;
3326   IS             iscol_d,isrow_d,iscol_o;
3327   Mat            Asub = NULL,Bsub = NULL;
3328   PetscInt       n;
3329 
3330   PetscFunctionBegin;
3331   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3332 
3333   if (call == MAT_REUSE_MATRIX) {
3334     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3335     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3336     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3337 
3338     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3339     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3340 
3341     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3342     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3343 
3344     /* Update diagonal and off-diagonal portions of submat */
3345     asub = (Mat_MPIAIJ*)(*submat)->data;
3346     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3347     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3348     if (n) {
3349       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3350     }
3351     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3352     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3353 
3354   } else { /* call == MAT_INITIAL_MATRIX) */
3355     const PetscInt *garray;
3356     PetscInt        BsubN;
3357 
3358     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3359     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3360 
3361     /* Create local submatrices Asub and Bsub */
3362     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3363     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3364 
3365     /* Create submatrix M */
3366     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3367 
3368     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3369     asub = (Mat_MPIAIJ*)M->data;
3370 
3371     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3372     n = asub->B->cmap->N;
3373     if (BsubN > n) {
3374       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3375       const PetscInt *idx;
3376       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3377       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3378 
3379       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3380       j = 0;
3381       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3382       for (i=0; i<n; i++) {
3383         if (j >= BsubN) break;
3384         while (subgarray[i] > garray[j]) j++;
3385 
3386         if (subgarray[i] == garray[j]) {
3387           idx_new[i] = idx[j++];
3388         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3389       }
3390       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3391 
3392       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3393       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3394 
3395     } else if (BsubN < n) {
3396       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3397     }
3398 
3399     ierr = PetscFree(garray);CHKERRQ(ierr);
3400     *submat = M;
3401 
3402     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3403     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3404     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3405 
3406     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3407     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3408 
3409     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3410     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3411   }
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3416 {
3417   PetscErrorCode ierr;
3418   IS             iscol_local=NULL,isrow_d;
3419   PetscInt       csize;
3420   PetscInt       n,i,j,start,end;
3421   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3422   MPI_Comm       comm;
3423 
3424   PetscFunctionBegin;
3425   /* If isrow has same processor distribution as mat,
3426      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3427   if (call == MAT_REUSE_MATRIX) {
3428     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3429     if (isrow_d) {
3430       sameRowDist  = PETSC_TRUE;
3431       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3432     } else {
3433       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3434       if (iscol_local) {
3435         sameRowDist  = PETSC_TRUE;
3436         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3437       }
3438     }
3439   } else {
3440     /* Check if isrow has same processor distribution as mat */
3441     sameDist[0] = PETSC_FALSE;
3442     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3443     if (!n) {
3444       sameDist[0] = PETSC_TRUE;
3445     } else {
3446       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3447       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3448       if (i >= start && j < end) {
3449         sameDist[0] = PETSC_TRUE;
3450       }
3451     }
3452 
3453     /* Check if iscol has same processor distribution as mat */
3454     sameDist[1] = PETSC_FALSE;
3455     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3456     if (!n) {
3457       sameDist[1] = PETSC_TRUE;
3458     } else {
3459       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3460       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3461       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3462     }
3463 
3464     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3465     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3466     sameRowDist = tsameDist[0];
3467   }
3468 
3469   if (sameRowDist) {
3470     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3471       /* isrow and iscol have same processor distribution as mat */
3472       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3473       PetscFunctionReturn(0);
3474     } else { /* sameRowDist */
3475       /* isrow has same processor distribution as mat */
3476       if (call == MAT_INITIAL_MATRIX) {
3477         PetscBool sorted;
3478         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3479         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3480         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3481         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3482 
3483         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3484         if (sorted) {
3485           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3486           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3487           PetscFunctionReturn(0);
3488         }
3489       } else { /* call == MAT_REUSE_MATRIX */
3490         IS    iscol_sub;
3491         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3492         if (iscol_sub) {
3493           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3494           PetscFunctionReturn(0);
3495         }
3496       }
3497     }
3498   }
3499 
3500   /* General case: iscol -> iscol_local which has global size of iscol */
3501   if (call == MAT_REUSE_MATRIX) {
3502     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3503     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3504   } else {
3505     if (!iscol_local) {
3506       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3507     }
3508   }
3509 
3510   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3511   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3512 
3513   if (call == MAT_INITIAL_MATRIX) {
3514     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3515     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3516   }
3517   PetscFunctionReturn(0);
3518 }
3519 
3520 /*@C
3521      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3522          and "off-diagonal" part of the matrix in CSR format.
3523 
3524    Collective
3525 
3526    Input Parameters:
3527 +  comm - MPI communicator
3528 .  A - "diagonal" portion of matrix
3529 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3530 -  garray - global index of B columns
3531 
3532    Output Parameter:
3533 .   mat - the matrix, with input A as its local diagonal matrix
3534    Level: advanced
3535 
3536    Notes:
3537        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3538        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3539 
3540 .seealso: MatCreateMPIAIJWithSplitArrays()
3541 @*/
3542 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3543 {
3544   PetscErrorCode ierr;
3545   Mat_MPIAIJ     *maij;
3546   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3547   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3548   PetscScalar    *oa=b->a;
3549   Mat            Bnew;
3550   PetscInt       m,n,N;
3551 
3552   PetscFunctionBegin;
3553   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3554   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3555   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3556   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3557   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3558   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3559 
3560   /* Get global columns of mat */
3561   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3562 
3563   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3564   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3565   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3566   maij = (Mat_MPIAIJ*)(*mat)->data;
3567 
3568   (*mat)->preallocated = PETSC_TRUE;
3569 
3570   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3571   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3572 
3573   /* Set A as diagonal portion of *mat */
3574   maij->A = A;
3575 
3576   nz = oi[m];
3577   for (i=0; i<nz; i++) {
3578     col   = oj[i];
3579     oj[i] = garray[col];
3580   }
3581 
3582    /* Set Bnew as off-diagonal portion of *mat */
3583   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3584   bnew        = (Mat_SeqAIJ*)Bnew->data;
3585   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3586   maij->B     = Bnew;
3587 
3588   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3589 
3590   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3591   b->free_a       = PETSC_FALSE;
3592   b->free_ij      = PETSC_FALSE;
3593   ierr = MatDestroy(&B);CHKERRQ(ierr);
3594 
3595   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3596   bnew->free_a       = PETSC_TRUE;
3597   bnew->free_ij      = PETSC_TRUE;
3598 
3599   /* condense columns of maij->B */
3600   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3601   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3602   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3603   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3604   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3605   PetscFunctionReturn(0);
3606 }
3607 
3608 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3609 
3610 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3611 {
3612   PetscErrorCode ierr;
3613   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3614   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3615   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3616   Mat            M,Msub,B=a->B;
3617   MatScalar      *aa;
3618   Mat_SeqAIJ     *aij;
3619   PetscInt       *garray = a->garray,*colsub,Ncols;
3620   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3621   IS             iscol_sub,iscmap;
3622   const PetscInt *is_idx,*cmap;
3623   PetscBool      allcolumns=PETSC_FALSE;
3624   MPI_Comm       comm;
3625 
3626   PetscFunctionBegin;
3627   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3628 
3629   if (call == MAT_REUSE_MATRIX) {
3630     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3631     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3632     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3633 
3634     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3635     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3636 
3637     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3638     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3639 
3640     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3641 
3642   } else { /* call == MAT_INITIAL_MATRIX) */
3643     PetscBool flg;
3644 
3645     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3646     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3647 
3648     /* (1) iscol -> nonscalable iscol_local */
3649     /* Check for special case: each processor gets entire matrix columns */
3650     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3651     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3652     if (allcolumns) {
3653       iscol_sub = iscol_local;
3654       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3655       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3656 
3657     } else {
3658       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3659       PetscInt *idx,*cmap1,k;
3660       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3661       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3662       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3663       count = 0;
3664       k     = 0;
3665       for (i=0; i<Ncols; i++) {
3666         j = is_idx[i];
3667         if (j >= cstart && j < cend) {
3668           /* diagonal part of mat */
3669           idx[count]     = j;
3670           cmap1[count++] = i; /* column index in submat */
3671         } else if (Bn) {
3672           /* off-diagonal part of mat */
3673           if (j == garray[k]) {
3674             idx[count]     = j;
3675             cmap1[count++] = i;  /* column index in submat */
3676           } else if (j > garray[k]) {
3677             while (j > garray[k] && k < Bn-1) k++;
3678             if (j == garray[k]) {
3679               idx[count]     = j;
3680               cmap1[count++] = i; /* column index in submat */
3681             }
3682           }
3683         }
3684       }
3685       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3686 
3687       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3688       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3689       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3690 
3691       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3692     }
3693 
3694     /* (3) Create sequential Msub */
3695     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3696   }
3697 
3698   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3699   aij  = (Mat_SeqAIJ*)(Msub)->data;
3700   ii   = aij->i;
3701   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3702 
3703   /*
3704       m - number of local rows
3705       Ncols - number of columns (same on all processors)
3706       rstart - first row in new global matrix generated
3707   */
3708   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3709 
3710   if (call == MAT_INITIAL_MATRIX) {
3711     /* (4) Create parallel newmat */
3712     PetscMPIInt    rank,size;
3713     PetscInt       csize;
3714 
3715     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3716     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3717 
3718     /*
3719         Determine the number of non-zeros in the diagonal and off-diagonal
3720         portions of the matrix in order to do correct preallocation
3721     */
3722 
3723     /* first get start and end of "diagonal" columns */
3724     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3725     if (csize == PETSC_DECIDE) {
3726       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3727       if (mglobal == Ncols) { /* square matrix */
3728         nlocal = m;
3729       } else {
3730         nlocal = Ncols/size + ((Ncols % size) > rank);
3731       }
3732     } else {
3733       nlocal = csize;
3734     }
3735     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3736     rstart = rend - nlocal;
3737     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3738 
3739     /* next, compute all the lengths */
3740     jj    = aij->j;
3741     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3742     olens = dlens + m;
3743     for (i=0; i<m; i++) {
3744       jend = ii[i+1] - ii[i];
3745       olen = 0;
3746       dlen = 0;
3747       for (j=0; j<jend; j++) {
3748         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3749         else dlen++;
3750         jj++;
3751       }
3752       olens[i] = olen;
3753       dlens[i] = dlen;
3754     }
3755 
3756     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3757     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3758 
3759     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3760     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3761     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3762     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3763     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3764     ierr = PetscFree(dlens);CHKERRQ(ierr);
3765 
3766   } else { /* call == MAT_REUSE_MATRIX */
3767     M    = *newmat;
3768     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3769     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3770     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3771     /*
3772          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3773        rather than the slower MatSetValues().
3774     */
3775     M->was_assembled = PETSC_TRUE;
3776     M->assembled     = PETSC_FALSE;
3777   }
3778 
3779   /* (5) Set values of Msub to *newmat */
3780   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3781   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3782 
3783   jj   = aij->j;
3784   aa   = aij->a;
3785   for (i=0; i<m; i++) {
3786     row = rstart + i;
3787     nz  = ii[i+1] - ii[i];
3788     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3789     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3790     jj += nz; aa += nz;
3791   }
3792   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3793 
3794   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3795   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3796 
3797   ierr = PetscFree(colsub);CHKERRQ(ierr);
3798 
3799   /* save Msub, iscol_sub and iscmap used in processor for next request */
3800   if (call ==  MAT_INITIAL_MATRIX) {
3801     *newmat = M;
3802     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3803     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3804 
3805     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3806     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3807 
3808     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3809     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3810 
3811     if (iscol_local) {
3812       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3813       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3814     }
3815   }
3816   PetscFunctionReturn(0);
3817 }
3818 
3819 /*
3820     Not great since it makes two copies of the submatrix, first an SeqAIJ
3821   in local and then by concatenating the local matrices the end result.
3822   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3823 
3824   Note: This requires a sequential iscol with all indices.
3825 */
3826 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3827 {
3828   PetscErrorCode ierr;
3829   PetscMPIInt    rank,size;
3830   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3831   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3832   Mat            M,Mreuse;
3833   MatScalar      *aa,*vwork;
3834   MPI_Comm       comm;
3835   Mat_SeqAIJ     *aij;
3836   PetscBool      colflag,allcolumns=PETSC_FALSE;
3837 
3838   PetscFunctionBegin;
3839   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3840   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3841   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3842 
3843   /* Check for special case: each processor gets entire matrix columns */
3844   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3845   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3846   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3847 
3848   if (call ==  MAT_REUSE_MATRIX) {
3849     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3850     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3851     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3852   } else {
3853     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3854   }
3855 
3856   /*
3857       m - number of local rows
3858       n - number of columns (same on all processors)
3859       rstart - first row in new global matrix generated
3860   */
3861   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3862   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3863   if (call == MAT_INITIAL_MATRIX) {
3864     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3865     ii  = aij->i;
3866     jj  = aij->j;
3867 
3868     /*
3869         Determine the number of non-zeros in the diagonal and off-diagonal
3870         portions of the matrix in order to do correct preallocation
3871     */
3872 
3873     /* first get start and end of "diagonal" columns */
3874     if (csize == PETSC_DECIDE) {
3875       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3876       if (mglobal == n) { /* square matrix */
3877         nlocal = m;
3878       } else {
3879         nlocal = n/size + ((n % size) > rank);
3880       }
3881     } else {
3882       nlocal = csize;
3883     }
3884     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3885     rstart = rend - nlocal;
3886     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3887 
3888     /* next, compute all the lengths */
3889     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3890     olens = dlens + m;
3891     for (i=0; i<m; i++) {
3892       jend = ii[i+1] - ii[i];
3893       olen = 0;
3894       dlen = 0;
3895       for (j=0; j<jend; j++) {
3896         if (*jj < rstart || *jj >= rend) olen++;
3897         else dlen++;
3898         jj++;
3899       }
3900       olens[i] = olen;
3901       dlens[i] = dlen;
3902     }
3903     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3904     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3905     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3906     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3907     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3908     ierr = PetscFree(dlens);CHKERRQ(ierr);
3909   } else {
3910     PetscInt ml,nl;
3911 
3912     M    = *newmat;
3913     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3914     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3915     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3916     /*
3917          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3918        rather than the slower MatSetValues().
3919     */
3920     M->was_assembled = PETSC_TRUE;
3921     M->assembled     = PETSC_FALSE;
3922   }
3923   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3924   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3925   ii   = aij->i;
3926   jj   = aij->j;
3927   aa   = aij->a;
3928   for (i=0; i<m; i++) {
3929     row   = rstart + i;
3930     nz    = ii[i+1] - ii[i];
3931     cwork = jj;     jj += nz;
3932     vwork = aa;     aa += nz;
3933     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3934   }
3935 
3936   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3938   *newmat = M;
3939 
3940   /* save submatrix used in processor for next request */
3941   if (call ==  MAT_INITIAL_MATRIX) {
3942     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3943     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3944   }
3945   PetscFunctionReturn(0);
3946 }
3947 
3948 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3949 {
3950   PetscInt       m,cstart, cend,j,nnz,i,d;
3951   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3952   const PetscInt *JJ;
3953   PetscErrorCode ierr;
3954   PetscBool      nooffprocentries;
3955 
3956   PetscFunctionBegin;
3957   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3958 
3959   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3960   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3961   m      = B->rmap->n;
3962   cstart = B->cmap->rstart;
3963   cend   = B->cmap->rend;
3964   rstart = B->rmap->rstart;
3965 
3966   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3967 
3968 #if defined(PETSC_USE_DEBUG)
3969   for (i=0; i<m; i++) {
3970     nnz = Ii[i+1]- Ii[i];
3971     JJ  = J + Ii[i];
3972     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3973     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3974     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3975   }
3976 #endif
3977 
3978   for (i=0; i<m; i++) {
3979     nnz     = Ii[i+1]- Ii[i];
3980     JJ      = J + Ii[i];
3981     nnz_max = PetscMax(nnz_max,nnz);
3982     d       = 0;
3983     for (j=0; j<nnz; j++) {
3984       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3985     }
3986     d_nnz[i] = d;
3987     o_nnz[i] = nnz - d;
3988   }
3989   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3990   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3991 
3992   for (i=0; i<m; i++) {
3993     ii   = i + rstart;
3994     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3995   }
3996   nooffprocentries    = B->nooffprocentries;
3997   B->nooffprocentries = PETSC_TRUE;
3998   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3999   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4000   B->nooffprocentries = nooffprocentries;
4001 
4002   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4003   PetscFunctionReturn(0);
4004 }
4005 
4006 /*@
4007    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4008    (the default parallel PETSc format).
4009 
4010    Collective
4011 
4012    Input Parameters:
4013 +  B - the matrix
4014 .  i - the indices into j for the start of each local row (starts with zero)
4015 .  j - the column indices for each local row (starts with zero)
4016 -  v - optional values in the matrix
4017 
4018    Level: developer
4019 
4020    Notes:
4021        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4022      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4023      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4024 
4025        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4026 
4027        The format which is used for the sparse matrix input, is equivalent to a
4028     row-major ordering.. i.e for the following matrix, the input data expected is
4029     as shown
4030 
4031 $        1 0 0
4032 $        2 0 3     P0
4033 $       -------
4034 $        4 5 6     P1
4035 $
4036 $     Process0 [P0]: rows_owned=[0,1]
4037 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4038 $        j =  {0,0,2}  [size = 3]
4039 $        v =  {1,2,3}  [size = 3]
4040 $
4041 $     Process1 [P1]: rows_owned=[2]
4042 $        i =  {0,3}    [size = nrow+1  = 1+1]
4043 $        j =  {0,1,2}  [size = 3]
4044 $        v =  {4,5,6}  [size = 3]
4045 
4046 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4047           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4048 @*/
4049 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4050 {
4051   PetscErrorCode ierr;
4052 
4053   PetscFunctionBegin;
4054   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4055   PetscFunctionReturn(0);
4056 }
4057 
4058 /*@C
4059    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4060    (the default parallel PETSc format).  For good matrix assembly performance
4061    the user should preallocate the matrix storage by setting the parameters
4062    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4063    performance can be increased by more than a factor of 50.
4064 
4065    Collective
4066 
4067    Input Parameters:
4068 +  B - the matrix
4069 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4070            (same value is used for all local rows)
4071 .  d_nnz - array containing the number of nonzeros in the various rows of the
4072            DIAGONAL portion of the local submatrix (possibly different for each row)
4073            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4074            The size of this array is equal to the number of local rows, i.e 'm'.
4075            For matrices that will be factored, you must leave room for (and set)
4076            the diagonal entry even if it is zero.
4077 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4078            submatrix (same value is used for all local rows).
4079 -  o_nnz - array containing the number of nonzeros in the various rows of the
4080            OFF-DIAGONAL portion of the local submatrix (possibly different for
4081            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4082            structure. The size of this array is equal to the number
4083            of local rows, i.e 'm'.
4084 
4085    If the *_nnz parameter is given then the *_nz parameter is ignored
4086 
4087    The AIJ format (also called the Yale sparse matrix format or
4088    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4089    storage.  The stored row and column indices begin with zero.
4090    See Users-Manual: ch_mat for details.
4091 
4092    The parallel matrix is partitioned such that the first m0 rows belong to
4093    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4094    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4095 
4096    The DIAGONAL portion of the local submatrix of a processor can be defined
4097    as the submatrix which is obtained by extraction the part corresponding to
4098    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4099    first row that belongs to the processor, r2 is the last row belonging to
4100    the this processor, and c1-c2 is range of indices of the local part of a
4101    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4102    common case of a square matrix, the row and column ranges are the same and
4103    the DIAGONAL part is also square. The remaining portion of the local
4104    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4105 
4106    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4107 
4108    You can call MatGetInfo() to get information on how effective the preallocation was;
4109    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4110    You can also run with the option -info and look for messages with the string
4111    malloc in them to see if additional memory allocation was needed.
4112 
4113    Example usage:
4114 
4115    Consider the following 8x8 matrix with 34 non-zero values, that is
4116    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4117    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4118    as follows:
4119 
4120 .vb
4121             1  2  0  |  0  3  0  |  0  4
4122     Proc0   0  5  6  |  7  0  0  |  8  0
4123             9  0 10  | 11  0  0  | 12  0
4124     -------------------------------------
4125            13  0 14  | 15 16 17  |  0  0
4126     Proc1   0 18  0  | 19 20 21  |  0  0
4127             0  0  0  | 22 23  0  | 24  0
4128     -------------------------------------
4129     Proc2  25 26 27  |  0  0 28  | 29  0
4130            30  0  0  | 31 32 33  |  0 34
4131 .ve
4132 
4133    This can be represented as a collection of submatrices as:
4134 
4135 .vb
4136       A B C
4137       D E F
4138       G H I
4139 .ve
4140 
4141    Where the submatrices A,B,C are owned by proc0, D,E,F are
4142    owned by proc1, G,H,I are owned by proc2.
4143 
4144    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4145    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4146    The 'M','N' parameters are 8,8, and have the same values on all procs.
4147 
4148    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4149    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4150    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4151    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4152    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4153    matrix, ans [DF] as another SeqAIJ matrix.
4154 
4155    When d_nz, o_nz parameters are specified, d_nz storage elements are
4156    allocated for every row of the local diagonal submatrix, and o_nz
4157    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4158    One way to choose d_nz and o_nz is to use the max nonzerors per local
4159    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4160    In this case, the values of d_nz,o_nz are:
4161 .vb
4162      proc0 : dnz = 2, o_nz = 2
4163      proc1 : dnz = 3, o_nz = 2
4164      proc2 : dnz = 1, o_nz = 4
4165 .ve
4166    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4167    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4168    for proc3. i.e we are using 12+15+10=37 storage locations to store
4169    34 values.
4170 
4171    When d_nnz, o_nnz parameters are specified, the storage is specified
4172    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4173    In the above case the values for d_nnz,o_nnz are:
4174 .vb
4175      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4176      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4177      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4178 .ve
4179    Here the space allocated is sum of all the above values i.e 34, and
4180    hence pre-allocation is perfect.
4181 
4182    Level: intermediate
4183 
4184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4185           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4186 @*/
4187 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4188 {
4189   PetscErrorCode ierr;
4190 
4191   PetscFunctionBegin;
4192   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4193   PetscValidType(B,1);
4194   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4195   PetscFunctionReturn(0);
4196 }
4197 
4198 /*@
4199      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4200          CSR format for the local rows.
4201 
4202    Collective
4203 
4204    Input Parameters:
4205 +  comm - MPI communicator
4206 .  m - number of local rows (Cannot be PETSC_DECIDE)
4207 .  n - This value should be the same as the local size used in creating the
4208        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4209        calculated if N is given) For square matrices n is almost always m.
4210 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4211 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4212 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4213 .   j - column indices
4214 -   a - matrix values
4215 
4216    Output Parameter:
4217 .   mat - the matrix
4218 
4219    Level: intermediate
4220 
4221    Notes:
4222        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4223      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4224      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4225 
4226        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4227 
4228        The format which is used for the sparse matrix input, is equivalent to a
4229     row-major ordering.. i.e for the following matrix, the input data expected is
4230     as shown
4231 
4232        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4233 
4234 $        1 0 0
4235 $        2 0 3     P0
4236 $       -------
4237 $        4 5 6     P1
4238 $
4239 $     Process0 [P0]: rows_owned=[0,1]
4240 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4241 $        j =  {0,0,2}  [size = 3]
4242 $        v =  {1,2,3}  [size = 3]
4243 $
4244 $     Process1 [P1]: rows_owned=[2]
4245 $        i =  {0,3}    [size = nrow+1  = 1+1]
4246 $        j =  {0,1,2}  [size = 3]
4247 $        v =  {4,5,6}  [size = 3]
4248 
4249 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4250           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4251 @*/
4252 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4253 {
4254   PetscErrorCode ierr;
4255 
4256   PetscFunctionBegin;
4257   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4258   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4259   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4260   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4261   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4262   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4263   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4264   PetscFunctionReturn(0);
4265 }
4266 
4267 /*@
4268      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4269          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4270 
4271    Collective
4272 
4273    Input Parameters:
4274 +  mat - the matrix
4275 .  m - number of local rows (Cannot be PETSC_DECIDE)
4276 .  n - This value should be the same as the local size used in creating the
4277        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4278        calculated if N is given) For square matrices n is almost always m.
4279 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4280 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4281 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4282 .  J - column indices
4283 -  v - matrix values
4284 
4285    Level: intermediate
4286 
4287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4288           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4289 @*/
4290 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4291 {
4292   PetscErrorCode ierr;
4293   PetscInt       cstart,nnz,i,j;
4294   PetscInt       *ld;
4295   PetscBool      nooffprocentries;
4296   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4297   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4298   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4299   const PetscInt *Adi = Ad->i;
4300   PetscInt       ldi,Iii,md;
4301 
4302   PetscFunctionBegin;
4303   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4304   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4305   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4306   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4307 
4308   cstart = mat->cmap->rstart;
4309   if (!Aij->ld) {
4310     /* count number of entries below block diagonal */
4311     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4312     Aij->ld = ld;
4313     for (i=0; i<m; i++) {
4314       nnz  = Ii[i+1]- Ii[i];
4315       j     = 0;
4316       while  (J[j] < cstart && j < nnz) {j++;}
4317       J    += nnz;
4318       ld[i] = j;
4319     }
4320   } else {
4321     ld = Aij->ld;
4322   }
4323 
4324   for (i=0; i<m; i++) {
4325     nnz  = Ii[i+1]- Ii[i];
4326     Iii  = Ii[i];
4327     ldi  = ld[i];
4328     md   = Adi[i+1]-Adi[i];
4329     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4330     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4331     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4332     ad  += md;
4333     ao  += nnz - md;
4334   }
4335   nooffprocentries      = mat->nooffprocentries;
4336   mat->nooffprocentries = PETSC_TRUE;
4337   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4338   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4339   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4342   mat->nooffprocentries = nooffprocentries;
4343   PetscFunctionReturn(0);
4344 }
4345 
4346 /*@C
4347    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4348    (the default parallel PETSc format).  For good matrix assembly performance
4349    the user should preallocate the matrix storage by setting the parameters
4350    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4351    performance can be increased by more than a factor of 50.
4352 
4353    Collective
4354 
4355    Input Parameters:
4356 +  comm - MPI communicator
4357 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4358            This value should be the same as the local size used in creating the
4359            y vector for the matrix-vector product y = Ax.
4360 .  n - This value should be the same as the local size used in creating the
4361        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4362        calculated if N is given) For square matrices n is almost always m.
4363 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4364 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4365 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4366            (same value is used for all local rows)
4367 .  d_nnz - array containing the number of nonzeros in the various rows of the
4368            DIAGONAL portion of the local submatrix (possibly different for each row)
4369            or NULL, if d_nz is used to specify the nonzero structure.
4370            The size of this array is equal to the number of local rows, i.e 'm'.
4371 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4372            submatrix (same value is used for all local rows).
4373 -  o_nnz - array containing the number of nonzeros in the various rows of the
4374            OFF-DIAGONAL portion of the local submatrix (possibly different for
4375            each row) or NULL, if o_nz is used to specify the nonzero
4376            structure. The size of this array is equal to the number
4377            of local rows, i.e 'm'.
4378 
4379    Output Parameter:
4380 .  A - the matrix
4381 
4382    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4383    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4384    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4385 
4386    Notes:
4387    If the *_nnz parameter is given then the *_nz parameter is ignored
4388 
4389    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4390    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4391    storage requirements for this matrix.
4392 
4393    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4394    processor than it must be used on all processors that share the object for
4395    that argument.
4396 
4397    The user MUST specify either the local or global matrix dimensions
4398    (possibly both).
4399 
4400    The parallel matrix is partitioned across processors such that the
4401    first m0 rows belong to process 0, the next m1 rows belong to
4402    process 1, the next m2 rows belong to process 2 etc.. where
4403    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4404    values corresponding to [m x N] submatrix.
4405 
4406    The columns are logically partitioned with the n0 columns belonging
4407    to 0th partition, the next n1 columns belonging to the next
4408    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4409 
4410    The DIAGONAL portion of the local submatrix on any given processor
4411    is the submatrix corresponding to the rows and columns m,n
4412    corresponding to the given processor. i.e diagonal matrix on
4413    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4414    etc. The remaining portion of the local submatrix [m x (N-n)]
4415    constitute the OFF-DIAGONAL portion. The example below better
4416    illustrates this concept.
4417 
4418    For a square global matrix we define each processor's diagonal portion
4419    to be its local rows and the corresponding columns (a square submatrix);
4420    each processor's off-diagonal portion encompasses the remainder of the
4421    local matrix (a rectangular submatrix).
4422 
4423    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4424 
4425    When calling this routine with a single process communicator, a matrix of
4426    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4427    type of communicator, use the construction mechanism
4428 .vb
4429      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4430 .ve
4431 
4432 $     MatCreate(...,&A);
4433 $     MatSetType(A,MATMPIAIJ);
4434 $     MatSetSizes(A, m,n,M,N);
4435 $     MatMPIAIJSetPreallocation(A,...);
4436 
4437    By default, this format uses inodes (identical nodes) when possible.
4438    We search for consecutive rows with the same nonzero structure, thereby
4439    reusing matrix information to achieve increased efficiency.
4440 
4441    Options Database Keys:
4442 +  -mat_no_inode  - Do not use inodes
4443 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4444 
4445 
4446 
4447    Example usage:
4448 
4449    Consider the following 8x8 matrix with 34 non-zero values, that is
4450    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4451    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4452    as follows
4453 
4454 .vb
4455             1  2  0  |  0  3  0  |  0  4
4456     Proc0   0  5  6  |  7  0  0  |  8  0
4457             9  0 10  | 11  0  0  | 12  0
4458     -------------------------------------
4459            13  0 14  | 15 16 17  |  0  0
4460     Proc1   0 18  0  | 19 20 21  |  0  0
4461             0  0  0  | 22 23  0  | 24  0
4462     -------------------------------------
4463     Proc2  25 26 27  |  0  0 28  | 29  0
4464            30  0  0  | 31 32 33  |  0 34
4465 .ve
4466 
4467    This can be represented as a collection of submatrices as
4468 
4469 .vb
4470       A B C
4471       D E F
4472       G H I
4473 .ve
4474 
4475    Where the submatrices A,B,C are owned by proc0, D,E,F are
4476    owned by proc1, G,H,I are owned by proc2.
4477 
4478    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4479    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4480    The 'M','N' parameters are 8,8, and have the same values on all procs.
4481 
4482    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4483    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4484    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4485    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4486    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4487    matrix, ans [DF] as another SeqAIJ matrix.
4488 
4489    When d_nz, o_nz parameters are specified, d_nz storage elements are
4490    allocated for every row of the local diagonal submatrix, and o_nz
4491    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4492    One way to choose d_nz and o_nz is to use the max nonzerors per local
4493    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4494    In this case, the values of d_nz,o_nz are
4495 .vb
4496      proc0 : dnz = 2, o_nz = 2
4497      proc1 : dnz = 3, o_nz = 2
4498      proc2 : dnz = 1, o_nz = 4
4499 .ve
4500    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4501    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4502    for proc3. i.e we are using 12+15+10=37 storage locations to store
4503    34 values.
4504 
4505    When d_nnz, o_nnz parameters are specified, the storage is specified
4506    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4507    In the above case the values for d_nnz,o_nnz are
4508 .vb
4509      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4510      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4511      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4512 .ve
4513    Here the space allocated is sum of all the above values i.e 34, and
4514    hence pre-allocation is perfect.
4515 
4516    Level: intermediate
4517 
4518 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4519           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4520 @*/
4521 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4522 {
4523   PetscErrorCode ierr;
4524   PetscMPIInt    size;
4525 
4526   PetscFunctionBegin;
4527   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4528   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4530   if (size > 1) {
4531     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4532     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4533   } else {
4534     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4535     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4536   }
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4541 {
4542   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4543   PetscBool      flg;
4544   PetscErrorCode ierr;
4545 
4546   PetscFunctionBegin;
4547   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4548   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4549   if (Ad)     *Ad     = a->A;
4550   if (Ao)     *Ao     = a->B;
4551   if (colmap) *colmap = a->garray;
4552   PetscFunctionReturn(0);
4553 }
4554 
4555 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4556 {
4557   PetscErrorCode ierr;
4558   PetscInt       m,N,i,rstart,nnz,Ii;
4559   PetscInt       *indx;
4560   PetscScalar    *values;
4561 
4562   PetscFunctionBegin;
4563   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4564   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4565     PetscInt       *dnz,*onz,sum,bs,cbs;
4566 
4567     if (n == PETSC_DECIDE) {
4568       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4569     }
4570     /* Check sum(n) = N */
4571     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4572     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4573 
4574     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4575     rstart -= m;
4576 
4577     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4578     for (i=0; i<m; i++) {
4579       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4580       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4581       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4582     }
4583 
4584     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4585     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4586     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4587     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4588     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4589     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4590     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4591     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4592   }
4593 
4594   /* numeric phase */
4595   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4596   for (i=0; i<m; i++) {
4597     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4598     Ii   = i + rstart;
4599     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4600     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4601   }
4602   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4604   PetscFunctionReturn(0);
4605 }
4606 
4607 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4608 {
4609   PetscErrorCode    ierr;
4610   PetscMPIInt       rank;
4611   PetscInt          m,N,i,rstart,nnz;
4612   size_t            len;
4613   const PetscInt    *indx;
4614   PetscViewer       out;
4615   char              *name;
4616   Mat               B;
4617   const PetscScalar *values;
4618 
4619   PetscFunctionBegin;
4620   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4621   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4622   /* Should this be the type of the diagonal block of A? */
4623   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4624   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4625   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4626   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4627   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4628   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4629   for (i=0; i<m; i++) {
4630     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4631     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4632     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4633   }
4634   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4635   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4636 
4637   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4638   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4639   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4640   sprintf(name,"%s.%d",outfile,rank);
4641   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4642   ierr = PetscFree(name);CHKERRQ(ierr);
4643   ierr = MatView(B,out);CHKERRQ(ierr);
4644   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4645   ierr = MatDestroy(&B);CHKERRQ(ierr);
4646   PetscFunctionReturn(0);
4647 }
4648 
4649 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4650 {
4651   PetscErrorCode      ierr;
4652   Mat_Merge_SeqsToMPI *merge;
4653   PetscContainer      container;
4654 
4655   PetscFunctionBegin;
4656   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4657   if (container) {
4658     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4659     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4660     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4661     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4662     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4663     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4665     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4666     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4667     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4668     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4669     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4670     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4671     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4672     ierr = PetscFree(merge);CHKERRQ(ierr);
4673     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4674   }
4675   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4676   PetscFunctionReturn(0);
4677 }
4678 
4679 #include <../src/mat/utils/freespace.h>
4680 #include <petscbt.h>
4681 
4682 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4683 {
4684   PetscErrorCode      ierr;
4685   MPI_Comm            comm;
4686   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4687   PetscMPIInt         size,rank,taga,*len_s;
4688   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4689   PetscInt            proc,m;
4690   PetscInt            **buf_ri,**buf_rj;
4691   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4692   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4693   MPI_Request         *s_waits,*r_waits;
4694   MPI_Status          *status;
4695   MatScalar           *aa=a->a;
4696   MatScalar           **abuf_r,*ba_i;
4697   Mat_Merge_SeqsToMPI *merge;
4698   PetscContainer      container;
4699 
4700   PetscFunctionBegin;
4701   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4702   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4703 
4704   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4705   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4706 
4707   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4708   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4709 
4710   bi     = merge->bi;
4711   bj     = merge->bj;
4712   buf_ri = merge->buf_ri;
4713   buf_rj = merge->buf_rj;
4714 
4715   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4716   owners = merge->rowmap->range;
4717   len_s  = merge->len_s;
4718 
4719   /* send and recv matrix values */
4720   /*-----------------------------*/
4721   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4722   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4723 
4724   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4725   for (proc=0,k=0; proc<size; proc++) {
4726     if (!len_s[proc]) continue;
4727     i    = owners[proc];
4728     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4729     k++;
4730   }
4731 
4732   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4733   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4734   ierr = PetscFree(status);CHKERRQ(ierr);
4735 
4736   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4737   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4738 
4739   /* insert mat values of mpimat */
4740   /*----------------------------*/
4741   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4742   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4743 
4744   for (k=0; k<merge->nrecv; k++) {
4745     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4746     nrows       = *(buf_ri_k[k]);
4747     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4748     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4749   }
4750 
4751   /* set values of ba */
4752   m = merge->rowmap->n;
4753   for (i=0; i<m; i++) {
4754     arow = owners[rank] + i;
4755     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4756     bnzi = bi[i+1] - bi[i];
4757     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4758 
4759     /* add local non-zero vals of this proc's seqmat into ba */
4760     anzi   = ai[arow+1] - ai[arow];
4761     aj     = a->j + ai[arow];
4762     aa     = a->a + ai[arow];
4763     nextaj = 0;
4764     for (j=0; nextaj<anzi; j++) {
4765       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4766         ba_i[j] += aa[nextaj++];
4767       }
4768     }
4769 
4770     /* add received vals into ba */
4771     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4772       /* i-th row */
4773       if (i == *nextrow[k]) {
4774         anzi   = *(nextai[k]+1) - *nextai[k];
4775         aj     = buf_rj[k] + *(nextai[k]);
4776         aa     = abuf_r[k] + *(nextai[k]);
4777         nextaj = 0;
4778         for (j=0; nextaj<anzi; j++) {
4779           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4780             ba_i[j] += aa[nextaj++];
4781           }
4782         }
4783         nextrow[k]++; nextai[k]++;
4784       }
4785     }
4786     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4787   }
4788   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4789   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4790 
4791   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4792   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4793   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4794   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4795   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4796   PetscFunctionReturn(0);
4797 }
4798 
4799 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4800 {
4801   PetscErrorCode      ierr;
4802   Mat                 B_mpi;
4803   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4804   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4805   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4806   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4807   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4808   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4809   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4810   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4811   MPI_Status          *status;
4812   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4813   PetscBT             lnkbt;
4814   Mat_Merge_SeqsToMPI *merge;
4815   PetscContainer      container;
4816 
4817   PetscFunctionBegin;
4818   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4819 
4820   /* make sure it is a PETSc comm */
4821   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4823   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4824 
4825   ierr = PetscNew(&merge);CHKERRQ(ierr);
4826   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4827 
4828   /* determine row ownership */
4829   /*---------------------------------------------------------*/
4830   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4831   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4832   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4833   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4834   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4835   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4836   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4837 
4838   m      = merge->rowmap->n;
4839   owners = merge->rowmap->range;
4840 
4841   /* determine the number of messages to send, their lengths */
4842   /*---------------------------------------------------------*/
4843   len_s = merge->len_s;
4844 
4845   len          = 0; /* length of buf_si[] */
4846   merge->nsend = 0;
4847   for (proc=0; proc<size; proc++) {
4848     len_si[proc] = 0;
4849     if (proc == rank) {
4850       len_s[proc] = 0;
4851     } else {
4852       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4853       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4854     }
4855     if (len_s[proc]) {
4856       merge->nsend++;
4857       nrows = 0;
4858       for (i=owners[proc]; i<owners[proc+1]; i++) {
4859         if (ai[i+1] > ai[i]) nrows++;
4860       }
4861       len_si[proc] = 2*(nrows+1);
4862       len         += len_si[proc];
4863     }
4864   }
4865 
4866   /* determine the number and length of messages to receive for ij-structure */
4867   /*-------------------------------------------------------------------------*/
4868   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4869   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4870 
4871   /* post the Irecv of j-structure */
4872   /*-------------------------------*/
4873   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4874   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4875 
4876   /* post the Isend of j-structure */
4877   /*--------------------------------*/
4878   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4879 
4880   for (proc=0, k=0; proc<size; proc++) {
4881     if (!len_s[proc]) continue;
4882     i    = owners[proc];
4883     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4884     k++;
4885   }
4886 
4887   /* receives and sends of j-structure are complete */
4888   /*------------------------------------------------*/
4889   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4890   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4891 
4892   /* send and recv i-structure */
4893   /*---------------------------*/
4894   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4895   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4896 
4897   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4898   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4899   for (proc=0,k=0; proc<size; proc++) {
4900     if (!len_s[proc]) continue;
4901     /* form outgoing message for i-structure:
4902          buf_si[0]:                 nrows to be sent
4903                [1:nrows]:           row index (global)
4904                [nrows+1:2*nrows+1]: i-structure index
4905     */
4906     /*-------------------------------------------*/
4907     nrows       = len_si[proc]/2 - 1;
4908     buf_si_i    = buf_si + nrows+1;
4909     buf_si[0]   = nrows;
4910     buf_si_i[0] = 0;
4911     nrows       = 0;
4912     for (i=owners[proc]; i<owners[proc+1]; i++) {
4913       anzi = ai[i+1] - ai[i];
4914       if (anzi) {
4915         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4916         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4917         nrows++;
4918       }
4919     }
4920     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4921     k++;
4922     buf_si += len_si[proc];
4923   }
4924 
4925   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4926   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4927 
4928   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4929   for (i=0; i<merge->nrecv; i++) {
4930     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4931   }
4932 
4933   ierr = PetscFree(len_si);CHKERRQ(ierr);
4934   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4935   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4936   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4937   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4938   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4939   ierr = PetscFree(status);CHKERRQ(ierr);
4940 
4941   /* compute a local seq matrix in each processor */
4942   /*----------------------------------------------*/
4943   /* allocate bi array and free space for accumulating nonzero column info */
4944   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4945   bi[0] = 0;
4946 
4947   /* create and initialize a linked list */
4948   nlnk = N+1;
4949   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4950 
4951   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4952   len  = ai[owners[rank+1]] - ai[owners[rank]];
4953   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4954 
4955   current_space = free_space;
4956 
4957   /* determine symbolic info for each local row */
4958   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4959 
4960   for (k=0; k<merge->nrecv; k++) {
4961     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4962     nrows       = *buf_ri_k[k];
4963     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4964     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4965   }
4966 
4967   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4968   len  = 0;
4969   for (i=0; i<m; i++) {
4970     bnzi = 0;
4971     /* add local non-zero cols of this proc's seqmat into lnk */
4972     arow  = owners[rank] + i;
4973     anzi  = ai[arow+1] - ai[arow];
4974     aj    = a->j + ai[arow];
4975     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4976     bnzi += nlnk;
4977     /* add received col data into lnk */
4978     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4979       if (i == *nextrow[k]) { /* i-th row */
4980         anzi  = *(nextai[k]+1) - *nextai[k];
4981         aj    = buf_rj[k] + *nextai[k];
4982         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4983         bnzi += nlnk;
4984         nextrow[k]++; nextai[k]++;
4985       }
4986     }
4987     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4988 
4989     /* if free space is not available, make more free space */
4990     if (current_space->local_remaining<bnzi) {
4991       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4992       nspacedouble++;
4993     }
4994     /* copy data into free space, then initialize lnk */
4995     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4996     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4997 
4998     current_space->array           += bnzi;
4999     current_space->local_used      += bnzi;
5000     current_space->local_remaining -= bnzi;
5001 
5002     bi[i+1] = bi[i] + bnzi;
5003   }
5004 
5005   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5006 
5007   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5008   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5009   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5010 
5011   /* create symbolic parallel matrix B_mpi */
5012   /*---------------------------------------*/
5013   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5014   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5015   if (n==PETSC_DECIDE) {
5016     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5017   } else {
5018     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5019   }
5020   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5021   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5022   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5023   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5024   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5025 
5026   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5027   B_mpi->assembled    = PETSC_FALSE;
5028   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5029   merge->bi           = bi;
5030   merge->bj           = bj;
5031   merge->buf_ri       = buf_ri;
5032   merge->buf_rj       = buf_rj;
5033   merge->coi          = NULL;
5034   merge->coj          = NULL;
5035   merge->owners_co    = NULL;
5036 
5037   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5038 
5039   /* attach the supporting struct to B_mpi for reuse */
5040   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5041   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5042   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5043   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5044   *mpimat = B_mpi;
5045 
5046   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5047   PetscFunctionReturn(0);
5048 }
5049 
5050 /*@C
5051       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5052                  matrices from each processor
5053 
5054     Collective
5055 
5056    Input Parameters:
5057 +    comm - the communicators the parallel matrix will live on
5058 .    seqmat - the input sequential matrices
5059 .    m - number of local rows (or PETSC_DECIDE)
5060 .    n - number of local columns (or PETSC_DECIDE)
5061 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5062 
5063    Output Parameter:
5064 .    mpimat - the parallel matrix generated
5065 
5066     Level: advanced
5067 
5068    Notes:
5069      The dimensions of the sequential matrix in each processor MUST be the same.
5070      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5071      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5072 @*/
5073 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5074 {
5075   PetscErrorCode ierr;
5076   PetscMPIInt    size;
5077 
5078   PetscFunctionBegin;
5079   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5080   if (size == 1) {
5081     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5082     if (scall == MAT_INITIAL_MATRIX) {
5083       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5084     } else {
5085       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5086     }
5087     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5088     PetscFunctionReturn(0);
5089   }
5090   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5091   if (scall == MAT_INITIAL_MATRIX) {
5092     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5093   }
5094   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5095   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5096   PetscFunctionReturn(0);
5097 }
5098 
5099 /*@
5100      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5101           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5102           with MatGetSize()
5103 
5104     Not Collective
5105 
5106    Input Parameters:
5107 +    A - the matrix
5108 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5109 
5110    Output Parameter:
5111 .    A_loc - the local sequential matrix generated
5112 
5113     Level: developer
5114 
5115 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5116 
5117 @*/
5118 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5119 {
5120   PetscErrorCode ierr;
5121   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5122   Mat_SeqAIJ     *mat,*a,*b;
5123   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5124   MatScalar      *aa,*ba,*cam;
5125   PetscScalar    *ca;
5126   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5127   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5128   PetscBool      match;
5129   MPI_Comm       comm;
5130   PetscMPIInt    size;
5131 
5132   PetscFunctionBegin;
5133   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5134   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5135   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5136   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5137   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5138 
5139   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5140   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5141   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5142   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5143   aa = a->a; ba = b->a;
5144   if (scall == MAT_INITIAL_MATRIX) {
5145     if (size == 1) {
5146       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5147       PetscFunctionReturn(0);
5148     }
5149 
5150     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5151     ci[0] = 0;
5152     for (i=0; i<am; i++) {
5153       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5154     }
5155     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5156     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5157     k    = 0;
5158     for (i=0; i<am; i++) {
5159       ncols_o = bi[i+1] - bi[i];
5160       ncols_d = ai[i+1] - ai[i];
5161       /* off-diagonal portion of A */
5162       for (jo=0; jo<ncols_o; jo++) {
5163         col = cmap[*bj];
5164         if (col >= cstart) break;
5165         cj[k]   = col; bj++;
5166         ca[k++] = *ba++;
5167       }
5168       /* diagonal portion of A */
5169       for (j=0; j<ncols_d; j++) {
5170         cj[k]   = cstart + *aj++;
5171         ca[k++] = *aa++;
5172       }
5173       /* off-diagonal portion of A */
5174       for (j=jo; j<ncols_o; j++) {
5175         cj[k]   = cmap[*bj++];
5176         ca[k++] = *ba++;
5177       }
5178     }
5179     /* put together the new matrix */
5180     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5181     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5182     /* Since these are PETSc arrays, change flags to free them as necessary. */
5183     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5184     mat->free_a  = PETSC_TRUE;
5185     mat->free_ij = PETSC_TRUE;
5186     mat->nonew   = 0;
5187   } else if (scall == MAT_REUSE_MATRIX) {
5188     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5189     ci = mat->i; cj = mat->j; cam = mat->a;
5190     for (i=0; i<am; i++) {
5191       /* off-diagonal portion of A */
5192       ncols_o = bi[i+1] - bi[i];
5193       for (jo=0; jo<ncols_o; jo++) {
5194         col = cmap[*bj];
5195         if (col >= cstart) break;
5196         *cam++ = *ba++; bj++;
5197       }
5198       /* diagonal portion of A */
5199       ncols_d = ai[i+1] - ai[i];
5200       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5201       /* off-diagonal portion of A */
5202       for (j=jo; j<ncols_o; j++) {
5203         *cam++ = *ba++; bj++;
5204       }
5205     }
5206   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5207   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5208   PetscFunctionReturn(0);
5209 }
5210 
5211 /*@C
5212      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5213 
5214     Not Collective
5215 
5216    Input Parameters:
5217 +    A - the matrix
5218 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5219 -    row, col - index sets of rows and columns to extract (or NULL)
5220 
5221    Output Parameter:
5222 .    A_loc - the local sequential matrix generated
5223 
5224     Level: developer
5225 
5226 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5227 
5228 @*/
5229 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5230 {
5231   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5232   PetscErrorCode ierr;
5233   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5234   IS             isrowa,iscola;
5235   Mat            *aloc;
5236   PetscBool      match;
5237 
5238   PetscFunctionBegin;
5239   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5240   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5241   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5242   if (!row) {
5243     start = A->rmap->rstart; end = A->rmap->rend;
5244     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5245   } else {
5246     isrowa = *row;
5247   }
5248   if (!col) {
5249     start = A->cmap->rstart;
5250     cmap  = a->garray;
5251     nzA   = a->A->cmap->n;
5252     nzB   = a->B->cmap->n;
5253     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5254     ncols = 0;
5255     for (i=0; i<nzB; i++) {
5256       if (cmap[i] < start) idx[ncols++] = cmap[i];
5257       else break;
5258     }
5259     imark = i;
5260     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5261     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5262     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5263   } else {
5264     iscola = *col;
5265   }
5266   if (scall != MAT_INITIAL_MATRIX) {
5267     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5268     aloc[0] = *A_loc;
5269   }
5270   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5271   if (!col) { /* attach global id of condensed columns */
5272     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5273   }
5274   *A_loc = aloc[0];
5275   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5276   if (!row) {
5277     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5278   }
5279   if (!col) {
5280     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5281   }
5282   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5283   PetscFunctionReturn(0);
5284 }
5285 
5286 /*
5287  * Destroy a mat that may be composed with PetscSF communication objects.
5288  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5289  * */
5290 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5291 {
5292   PetscSF          sf,osf;
5293   IS               map;
5294   PetscErrorCode   ierr;
5295 
5296   PetscFunctionBegin;
5297   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5298   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5299   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5300   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5301   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5302   ierr = ISDestroy(&map);CHKERRQ(ierr);
5303   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5304   PetscFunctionReturn(0);
5305 }
5306 
5307 /*
5308  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5309  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5310  * on a global size.
5311  * */
5312 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5313 {
5314   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5315   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5316   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol;
5317   PetscSFNode              *iremote,*oiremote;
5318   const PetscInt           *lrowindices;
5319   PetscErrorCode           ierr;
5320   PetscSF                  sf,osf;
5321   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5322   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5323   MPI_Comm                 comm;
5324   ISLocalToGlobalMapping   mapping;
5325 
5326   PetscFunctionBegin;
5327   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5328   /* plocalsize is the number of roots
5329    * nrows is the number of leaves
5330    * */
5331   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5332   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5333   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5334   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5335   for (i=0;i<nrows;i++) {
5336     /* Find a remote index and an owner for a row
5337      * The row could be local or remote
5338      * */
5339     owner = 0;
5340     lidx  = 0;
5341     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5342     iremote[i].index = lidx;
5343     iremote[i].rank  = owner;
5344   }
5345   /* Create SF to communicate how many nonzero columns for each row */
5346   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5347   /* SF will figure out the number of nonzero colunms for each row, and their
5348    * offsets
5349    * */
5350   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5351   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5352   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5353 
5354   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5355   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5356   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5357   roffsets[0] = 0;
5358   roffsets[1] = 0;
5359   for (i=0;i<plocalsize;i++) {
5360     /* diag */
5361     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5362     /* off diag */
5363     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5364     /* compute offsets so that we relative location for each row */
5365     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5366     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5367   }
5368   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5369   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5370   /* 'r' means root, and 'l' means leaf */
5371   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5372   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5373   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5374   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5375   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5376   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5377   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5378   dntotalcols = 0;
5379   ontotalcols = 0;
5380   ncol = 0;
5381   for (i=0;i<nrows;i++) {
5382     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5383     ncol = PetscMax(pnnz[i],ncol);
5384     /* diag */
5385     dntotalcols += nlcols[i*2+0];
5386     /* off diag */
5387     ontotalcols += nlcols[i*2+1];
5388   }
5389   /* We do not need to figure the right number of columns
5390    * since all the calculations will be done by going through the raw data
5391    * */
5392   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5393   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5394   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5395   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5396   /* diag */
5397   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5398   /* off diag */
5399   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5400   /* diag */
5401   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5402   /* off diag */
5403   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5404   dntotalcols = 0;
5405   ontotalcols = 0;
5406   ntotalcols  = 0;
5407   for (i=0;i<nrows;i++) {
5408     owner = 0;
5409     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5410     /* Set iremote for diag matrix */
5411     for (j=0;j<nlcols[i*2+0];j++) {
5412       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5413       iremote[dntotalcols].rank    = owner;
5414       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5415       ilocal[dntotalcols++]        = ntotalcols++;
5416     }
5417     /* off diag */
5418     for (j=0;j<nlcols[i*2+1];j++) {
5419       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5420       oiremote[ontotalcols].rank    = owner;
5421       oilocal[ontotalcols++]        = ntotalcols++;
5422     }
5423   }
5424   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5425   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5426   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5427   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5428   /* P serves as roots and P_oth is leaves
5429    * Diag matrix
5430    * */
5431   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5432   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5433   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5434 
5435   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5436   /* Off diag */
5437   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5438   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5439   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5440   /* We operate on the matrix internal data for saving memory */
5441   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5442   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5443   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5444   /* Convert to global indices for diag matrix */
5445   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5446   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5447   /* We want P_oth store global indices */
5448   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5449   /* Use memory scalable approach */
5450   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5451   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5452   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5453   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5454   /* Convert back to local indices */
5455   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5456   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5457   nout = 0;
5458   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5459   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5460   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5461   /* Exchange values */
5462   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5463   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5464   /* Stop PETSc from shrinking memory */
5465   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5466   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5467   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5468   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5469   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5470   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5471   /* ``New MatDestroy" takes care of PetscSF objects as well */
5472   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5473   PetscFunctionReturn(0);
5474 }
5475 
5476 /*
5477  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5478  * This supports MPIAIJ and MAIJ
5479  * */
5480 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5481 {
5482   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5483   Mat_SeqAIJ            *p_oth;
5484   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5485   IS                    rows,map;
5486   PetscHMapI            hamp;
5487   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5488   MPI_Comm              comm;
5489   PetscSF               sf,osf;
5490   PetscBool             has;
5491   PetscErrorCode        ierr;
5492 
5493   PetscFunctionBegin;
5494   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5495   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5496   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5497    *  and then create a submatrix (that often is an overlapping matrix)
5498    * */
5499   if (reuse==MAT_INITIAL_MATRIX) {
5500     /* Use a hash table to figure out unique keys */
5501     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5502     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5503     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5504     count = 0;
5505     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5506     for (i=0;i<a->B->cmap->n;i++) {
5507       key  = a->garray[i]/dof;
5508       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5509       if (!has) {
5510         mapping[i] = count;
5511         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5512       } else {
5513         /* Current 'i' has the same value the previous step */
5514         mapping[i] = count-1;
5515       }
5516     }
5517     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5518     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5519     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5520     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5521     off = 0;
5522     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5523     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5524     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5525     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5526     /* In case, the matrix was already created but users want to recreate the matrix */
5527     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5528     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5529     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5530     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5531   } else if (reuse==MAT_REUSE_MATRIX) {
5532     /* If matrix was already created, we simply update values using SF objects
5533      * that as attached to the matrix ealier.
5534      *  */
5535     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5536     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5537     if (!sf || !osf) {
5538       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5539     }
5540     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5541     /* Update values in place */
5542     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5543     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5544     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5545     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5546   } else {
5547     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5548   }
5549   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 /*@C
5554     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5555 
5556     Collective on Mat
5557 
5558    Input Parameters:
5559 +    A,B - the matrices in mpiaij format
5560 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5561 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5562 
5563    Output Parameter:
5564 +    rowb, colb - index sets of rows and columns of B to extract
5565 -    B_seq - the sequential matrix generated
5566 
5567     Level: developer
5568 
5569 @*/
5570 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5571 {
5572   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5573   PetscErrorCode ierr;
5574   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5575   IS             isrowb,iscolb;
5576   Mat            *bseq=NULL;
5577 
5578   PetscFunctionBegin;
5579   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5580     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5581   }
5582   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5583 
5584   if (scall == MAT_INITIAL_MATRIX) {
5585     start = A->cmap->rstart;
5586     cmap  = a->garray;
5587     nzA   = a->A->cmap->n;
5588     nzB   = a->B->cmap->n;
5589     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5590     ncols = 0;
5591     for (i=0; i<nzB; i++) {  /* row < local row index */
5592       if (cmap[i] < start) idx[ncols++] = cmap[i];
5593       else break;
5594     }
5595     imark = i;
5596     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5597     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5598     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5599     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5600   } else {
5601     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5602     isrowb  = *rowb; iscolb = *colb;
5603     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5604     bseq[0] = *B_seq;
5605   }
5606   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5607   *B_seq = bseq[0];
5608   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5609   if (!rowb) {
5610     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5611   } else {
5612     *rowb = isrowb;
5613   }
5614   if (!colb) {
5615     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5616   } else {
5617     *colb = iscolb;
5618   }
5619   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5620   PetscFunctionReturn(0);
5621 }
5622 
5623 /*
5624     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5625     of the OFF-DIAGONAL portion of local A
5626 
5627     Collective on Mat
5628 
5629    Input Parameters:
5630 +    A,B - the matrices in mpiaij format
5631 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5632 
5633    Output Parameter:
5634 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5635 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5636 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5637 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5638 
5639     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5640      for this matrix. This is not desirable..
5641 
5642     Level: developer
5643 
5644 */
5645 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5646 {
5647   PetscErrorCode         ierr;
5648   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5649   Mat_SeqAIJ             *b_oth;
5650   VecScatter             ctx;
5651   MPI_Comm               comm;
5652   const PetscMPIInt      *rprocs,*sprocs;
5653   const PetscInt         *srow,*rstarts,*sstarts;
5654   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5655   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5656   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5657   MPI_Request            *rwaits = NULL,*swaits = NULL;
5658   MPI_Status             rstatus;
5659   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5660 
5661   PetscFunctionBegin;
5662   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5663   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5664 
5665   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5666     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5667   }
5668   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5670 
5671   if (size == 1) {
5672     startsj_s = NULL;
5673     bufa_ptr  = NULL;
5674     *B_oth    = NULL;
5675     PetscFunctionReturn(0);
5676   }
5677 
5678   ctx = a->Mvctx;
5679   tag = ((PetscObject)ctx)->tag;
5680 
5681   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5682   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5683   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5684   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5685   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5686   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5687   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5688 
5689   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5690   if (scall == MAT_INITIAL_MATRIX) {
5691     /* i-array */
5692     /*---------*/
5693     /*  post receives */
5694     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5695     for (i=0; i<nrecvs; i++) {
5696       rowlen = rvalues + rstarts[i]*rbs;
5697       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5698       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5699     }
5700 
5701     /* pack the outgoing message */
5702     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5703 
5704     sstartsj[0] = 0;
5705     rstartsj[0] = 0;
5706     len         = 0; /* total length of j or a array to be sent */
5707     if (nsends) {
5708       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5709       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5710     }
5711     for (i=0; i<nsends; i++) {
5712       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5713       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5714       for (j=0; j<nrows; j++) {
5715         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5716         for (l=0; l<sbs; l++) {
5717           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5718 
5719           rowlen[j*sbs+l] = ncols;
5720 
5721           len += ncols;
5722           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5723         }
5724         k++;
5725       }
5726       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5727 
5728       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5729     }
5730     /* recvs and sends of i-array are completed */
5731     i = nrecvs;
5732     while (i--) {
5733       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5734     }
5735     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5736     ierr = PetscFree(svalues);CHKERRQ(ierr);
5737 
5738     /* allocate buffers for sending j and a arrays */
5739     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5740     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5741 
5742     /* create i-array of B_oth */
5743     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5744 
5745     b_othi[0] = 0;
5746     len       = 0; /* total length of j or a array to be received */
5747     k         = 0;
5748     for (i=0; i<nrecvs; i++) {
5749       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5750       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5751       for (j=0; j<nrows; j++) {
5752         b_othi[k+1] = b_othi[k] + rowlen[j];
5753         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5754         k++;
5755       }
5756       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5757     }
5758     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5759 
5760     /* allocate space for j and a arrrays of B_oth */
5761     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5762     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5763 
5764     /* j-array */
5765     /*---------*/
5766     /*  post receives of j-array */
5767     for (i=0; i<nrecvs; i++) {
5768       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5769       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5770     }
5771 
5772     /* pack the outgoing message j-array */
5773     if (nsends) k = sstarts[0];
5774     for (i=0; i<nsends; i++) {
5775       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5776       bufJ  = bufj+sstartsj[i];
5777       for (j=0; j<nrows; j++) {
5778         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5779         for (ll=0; ll<sbs; ll++) {
5780           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5781           for (l=0; l<ncols; l++) {
5782             *bufJ++ = cols[l];
5783           }
5784           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5785         }
5786       }
5787       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5788     }
5789 
5790     /* recvs and sends of j-array are completed */
5791     i = nrecvs;
5792     while (i--) {
5793       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5794     }
5795     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5796   } else if (scall == MAT_REUSE_MATRIX) {
5797     sstartsj = *startsj_s;
5798     rstartsj = *startsj_r;
5799     bufa     = *bufa_ptr;
5800     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5801     b_otha   = b_oth->a;
5802   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5803 
5804   /* a-array */
5805   /*---------*/
5806   /*  post receives of a-array */
5807   for (i=0; i<nrecvs; i++) {
5808     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5809     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5810   }
5811 
5812   /* pack the outgoing message a-array */
5813   if (nsends) k = sstarts[0];
5814   for (i=0; i<nsends; i++) {
5815     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5816     bufA  = bufa+sstartsj[i];
5817     for (j=0; j<nrows; j++) {
5818       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5819       for (ll=0; ll<sbs; ll++) {
5820         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5821         for (l=0; l<ncols; l++) {
5822           *bufA++ = vals[l];
5823         }
5824         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5825       }
5826     }
5827     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5828   }
5829   /* recvs and sends of a-array are completed */
5830   i = nrecvs;
5831   while (i--) {
5832     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5833   }
5834   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5835   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5836 
5837   if (scall == MAT_INITIAL_MATRIX) {
5838     /* put together the new matrix */
5839     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5840 
5841     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5842     /* Since these are PETSc arrays, change flags to free them as necessary. */
5843     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5844     b_oth->free_a  = PETSC_TRUE;
5845     b_oth->free_ij = PETSC_TRUE;
5846     b_oth->nonew   = 0;
5847 
5848     ierr = PetscFree(bufj);CHKERRQ(ierr);
5849     if (!startsj_s || !bufa_ptr) {
5850       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5851       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5852     } else {
5853       *startsj_s = sstartsj;
5854       *startsj_r = rstartsj;
5855       *bufa_ptr  = bufa;
5856     }
5857   }
5858 
5859   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5860   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5861   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5862   PetscFunctionReturn(0);
5863 }
5864 
5865 /*@C
5866   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5867 
5868   Not Collective
5869 
5870   Input Parameters:
5871 . A - The matrix in mpiaij format
5872 
5873   Output Parameter:
5874 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5875 . colmap - A map from global column index to local index into lvec
5876 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5877 
5878   Level: developer
5879 
5880 @*/
5881 #if defined(PETSC_USE_CTABLE)
5882 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5883 #else
5884 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5885 #endif
5886 {
5887   Mat_MPIAIJ *a;
5888 
5889   PetscFunctionBegin;
5890   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5891   PetscValidPointer(lvec, 2);
5892   PetscValidPointer(colmap, 3);
5893   PetscValidPointer(multScatter, 4);
5894   a = (Mat_MPIAIJ*) A->data;
5895   if (lvec) *lvec = a->lvec;
5896   if (colmap) *colmap = a->colmap;
5897   if (multScatter) *multScatter = a->Mvctx;
5898   PetscFunctionReturn(0);
5899 }
5900 
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5904 #if defined(PETSC_HAVE_MKL_SPARSE)
5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5906 #endif
5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5908 #if defined(PETSC_HAVE_ELEMENTAL)
5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5910 #endif
5911 #if defined(PETSC_HAVE_HYPRE)
5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5913 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5914 #endif
5915 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5917 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5918 
5919 /*
5920     Computes (B'*A')' since computing B*A directly is untenable
5921 
5922                n                       p                          p
5923         (              )       (              )         (                  )
5924       m (      A       )  *  n (       B      )   =   m (         C        )
5925         (              )       (              )         (                  )
5926 
5927 */
5928 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5929 {
5930   PetscErrorCode ierr;
5931   Mat            At,Bt,Ct;
5932 
5933   PetscFunctionBegin;
5934   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5935   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5936   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5937   ierr = MatDestroy(&At);CHKERRQ(ierr);
5938   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5939   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5940   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5941   PetscFunctionReturn(0);
5942 }
5943 
5944 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5945 {
5946   PetscErrorCode ierr;
5947   PetscInt       m=A->rmap->n,n=B->cmap->n;
5948   Mat            Cmat;
5949 
5950   PetscFunctionBegin;
5951   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5952   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5953   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5954   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5955   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5956   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5957   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5958   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5959 
5960   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5961 
5962   *C = Cmat;
5963   PetscFunctionReturn(0);
5964 }
5965 
5966 /* ----------------------------------------------------------------*/
5967 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5968 {
5969   PetscErrorCode ierr;
5970 
5971   PetscFunctionBegin;
5972   if (scall == MAT_INITIAL_MATRIX) {
5973     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5974     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5975     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5976   }
5977   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5978   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5979   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5980   PetscFunctionReturn(0);
5981 }
5982 
5983 /*MC
5984    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5985 
5986    Options Database Keys:
5987 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5988 
5989    Level: beginner
5990 
5991    Notes:
5992     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5993     in this case the values associated with the rows and columns one passes in are set to zero
5994     in the matrix
5995 
5996     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5997     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5998 
5999 .seealso: MatCreateAIJ()
6000 M*/
6001 
6002 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6003 {
6004   Mat_MPIAIJ     *b;
6005   PetscErrorCode ierr;
6006   PetscMPIInt    size;
6007 
6008   PetscFunctionBegin;
6009   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6010 
6011   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6012   B->data       = (void*)b;
6013   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6014   B->assembled  = PETSC_FALSE;
6015   B->insertmode = NOT_SET_VALUES;
6016   b->size       = size;
6017 
6018   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6019 
6020   /* build cache for off array entries formed */
6021   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6022 
6023   b->donotstash  = PETSC_FALSE;
6024   b->colmap      = 0;
6025   b->garray      = 0;
6026   b->roworiented = PETSC_TRUE;
6027 
6028   /* stuff used for matrix vector multiply */
6029   b->lvec  = NULL;
6030   b->Mvctx = NULL;
6031 
6032   /* stuff for MatGetRow() */
6033   b->rowindices   = 0;
6034   b->rowvalues    = 0;
6035   b->getrowactive = PETSC_FALSE;
6036 
6037   /* flexible pointer used in CUSP/CUSPARSE classes */
6038   b->spptr = NULL;
6039 
6040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6050 #if defined(PETSC_HAVE_MKL_SPARSE)
6051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6052 #endif
6053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6055 #if defined(PETSC_HAVE_ELEMENTAL)
6056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6057 #endif
6058 #if defined(PETSC_HAVE_HYPRE)
6059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6060 #endif
6061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6066 #if defined(PETSC_HAVE_HYPRE)
6067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6068 #endif
6069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6070   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6071   PetscFunctionReturn(0);
6072 }
6073 
6074 /*@C
6075      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6076          and "off-diagonal" part of the matrix in CSR format.
6077 
6078    Collective
6079 
6080    Input Parameters:
6081 +  comm - MPI communicator
6082 .  m - number of local rows (Cannot be PETSC_DECIDE)
6083 .  n - This value should be the same as the local size used in creating the
6084        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6085        calculated if N is given) For square matrices n is almost always m.
6086 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6087 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6088 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6089 .   j - column indices
6090 .   a - matrix values
6091 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6092 .   oj - column indices
6093 -   oa - matrix values
6094 
6095    Output Parameter:
6096 .   mat - the matrix
6097 
6098    Level: advanced
6099 
6100    Notes:
6101        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6102        must free the arrays once the matrix has been destroyed and not before.
6103 
6104        The i and j indices are 0 based
6105 
6106        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6107 
6108        This sets local rows and cannot be used to set off-processor values.
6109 
6110        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6111        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6112        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6113        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6114        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6115        communication if it is known that only local entries will be set.
6116 
6117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6118           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6119 @*/
6120 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6121 {
6122   PetscErrorCode ierr;
6123   Mat_MPIAIJ     *maij;
6124 
6125   PetscFunctionBegin;
6126   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6127   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6128   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6129   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6130   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6131   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6132   maij = (Mat_MPIAIJ*) (*mat)->data;
6133 
6134   (*mat)->preallocated = PETSC_TRUE;
6135 
6136   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6137   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6138 
6139   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6140   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6141 
6142   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6143   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6144   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6145   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6146 
6147   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6148   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6149   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6150   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6151   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6152   PetscFunctionReturn(0);
6153 }
6154 
6155 /*
6156     Special version for direct calls from Fortran
6157 */
6158 #include <petsc/private/fortranimpl.h>
6159 
6160 /* Change these macros so can be used in void function */
6161 #undef CHKERRQ
6162 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6163 #undef SETERRQ2
6164 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6165 #undef SETERRQ3
6166 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6167 #undef SETERRQ
6168 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6169 
6170 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6171 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6172 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6173 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6174 #else
6175 #endif
6176 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6177 {
6178   Mat            mat  = *mmat;
6179   PetscInt       m    = *mm, n = *mn;
6180   InsertMode     addv = *maddv;
6181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6182   PetscScalar    value;
6183   PetscErrorCode ierr;
6184 
6185   MatCheckPreallocated(mat,1);
6186   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6187 
6188 #if defined(PETSC_USE_DEBUG)
6189   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6190 #endif
6191   {
6192     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6193     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6194     PetscBool roworiented = aij->roworiented;
6195 
6196     /* Some Variables required in the macro */
6197     Mat        A                 = aij->A;
6198     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6199     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6200     MatScalar  *aa               = a->a;
6201     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6202     Mat        B                 = aij->B;
6203     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6204     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6205     MatScalar  *ba               = b->a;
6206 
6207     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6208     PetscInt  nonew = a->nonew;
6209     MatScalar *ap1,*ap2;
6210 
6211     PetscFunctionBegin;
6212     for (i=0; i<m; i++) {
6213       if (im[i] < 0) continue;
6214 #if defined(PETSC_USE_DEBUG)
6215       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6216 #endif
6217       if (im[i] >= rstart && im[i] < rend) {
6218         row      = im[i] - rstart;
6219         lastcol1 = -1;
6220         rp1      = aj + ai[row];
6221         ap1      = aa + ai[row];
6222         rmax1    = aimax[row];
6223         nrow1    = ailen[row];
6224         low1     = 0;
6225         high1    = nrow1;
6226         lastcol2 = -1;
6227         rp2      = bj + bi[row];
6228         ap2      = ba + bi[row];
6229         rmax2    = bimax[row];
6230         nrow2    = bilen[row];
6231         low2     = 0;
6232         high2    = nrow2;
6233 
6234         for (j=0; j<n; j++) {
6235           if (roworiented) value = v[i*n+j];
6236           else value = v[i+j*m];
6237           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6238           if (in[j] >= cstart && in[j] < cend) {
6239             col = in[j] - cstart;
6240             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6241           } else if (in[j] < 0) continue;
6242 #if defined(PETSC_USE_DEBUG)
6243           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6244           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6245 #endif
6246           else {
6247             if (mat->was_assembled) {
6248               if (!aij->colmap) {
6249                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6250               }
6251 #if defined(PETSC_USE_CTABLE)
6252               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6253               col--;
6254 #else
6255               col = aij->colmap[in[j]] - 1;
6256 #endif
6257               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6258                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6259                 col  =  in[j];
6260                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6261                 B     = aij->B;
6262                 b     = (Mat_SeqAIJ*)B->data;
6263                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6264                 rp2   = bj + bi[row];
6265                 ap2   = ba + bi[row];
6266                 rmax2 = bimax[row];
6267                 nrow2 = bilen[row];
6268                 low2  = 0;
6269                 high2 = nrow2;
6270                 bm    = aij->B->rmap->n;
6271                 ba    = b->a;
6272               }
6273             } else col = in[j];
6274             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6275           }
6276         }
6277       } else if (!aij->donotstash) {
6278         if (roworiented) {
6279           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6280         } else {
6281           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6282         }
6283       }
6284     }
6285   }
6286   PetscFunctionReturnVoid();
6287 }
6288