xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b0bdc8384fc2b31096d969f3a75fbcfdfbe83867)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           goto a_noinsert; \
468         } \
469       }  \
470       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
471       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
472       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
473       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
474       N = nrow1++ - 1; a->nz++; high1++; \
475       /* shift up all the later entries in this row */ \
476       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
477       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
478       rp1[_i] = col;  \
479       ap1[_i] = value;  \
480       A->nonzerostate++;\
481       a_noinsert: ; \
482       ailen[row] = nrow1; \
483 }
484 
485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
486   { \
487     if (col <= lastcol2) low2 = 0;                        \
488     else high2 = nrow2;                                   \
489     lastcol2 = col;                                       \
490     while (high2-low2 > 5) {                              \
491       t = (low2+high2)/2;                                 \
492       if (rp2[t] > col) high2 = t;                        \
493       else             low2  = t;                         \
494     }                                                     \
495     for (_i=low2; _i<high2; _i++) {                       \
496       if (rp2[_i] > col) break;                           \
497       if (rp2[_i] == col) {                               \
498         if (addv == ADD_VALUES) {                         \
499           ap2[_i] += value;                               \
500           (void)PetscLogFlops(1.0);                       \
501         }                                                 \
502         else                    ap2[_i] = value;          \
503         goto b_noinsert;                                  \
504       }                                                   \
505     }                                                     \
506     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
507     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
508     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
509     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
510     N = nrow2++ - 1; b->nz++; high2++;                    \
511     /* shift up all the later entries in this row */      \
512     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
513     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
514     rp2[_i] = col;                                        \
515     ap2[_i] = value;                                      \
516     B->nonzerostate++;                                    \
517     b_noinsert: ;                                         \
518     bilen[row] = nrow2;                                   \
519   }
520 
521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
522 {
523   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
524   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
525   PetscErrorCode ierr;
526   PetscInt       l,*garray = mat->garray,diag;
527 
528   PetscFunctionBegin;
529   /* code only works for square matrices A */
530 
531   /* find size of row to the left of the diagonal part */
532   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
533   row  = row - diag;
534   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
535     if (garray[b->j[b->i[row]+l]] > diag) break;
536   }
537   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
538 
539   /* diagonal part */
540   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
541 
542   /* right of diagonal part */
543   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
548 {
549   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
550   PetscScalar    value = 0.0;
551   PetscErrorCode ierr;
552   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
554   PetscBool      roworiented = aij->roworiented;
555 
556   /* Some Variables required in the macro */
557   Mat        A                 = aij->A;
558   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
559   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
560   MatScalar  *aa               = a->a;
561   PetscBool  ignorezeroentries = a->ignorezeroentries;
562   Mat        B                 = aij->B;
563   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
564   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
565   MatScalar  *ba               = b->a;
566 
567   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
568   PetscInt  nonew;
569   MatScalar *ap1,*ap2;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (im[i] < 0) continue;
574 #if defined(PETSC_USE_DEBUG)
575     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
576 #endif
577     if (im[i] >= rstart && im[i] < rend) {
578       row      = im[i] - rstart;
579       lastcol1 = -1;
580       rp1      = aj + ai[row];
581       ap1      = aa + ai[row];
582       rmax1    = aimax[row];
583       nrow1    = ailen[row];
584       low1     = 0;
585       high1    = nrow1;
586       lastcol2 = -1;
587       rp2      = bj + bi[row];
588       ap2      = ba + bi[row];
589       rmax2    = bimax[row];
590       nrow2    = bilen[row];
591       low2     = 0;
592       high2    = nrow2;
593 
594       for (j=0; j<n; j++) {
595         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
596         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
597         if (in[j] >= cstart && in[j] < cend) {
598           col   = in[j] - cstart;
599           nonew = a->nonew;
600           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
601         } else if (in[j] < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
604 #endif
605         else {
606           if (mat->was_assembled) {
607             if (!aij->colmap) {
608               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609             }
610 #if defined(PETSC_USE_CTABLE)
611             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
612             col--;
613 #else
614             col = aij->colmap[in[j]] - 1;
615 #endif
616             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
617               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
618               col  =  in[j];
619               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
620               B     = aij->B;
621               b     = (Mat_SeqAIJ*)B->data;
622               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
623               rp2   = bj + bi[row];
624               ap2   = ba + bi[row];
625               rmax2 = bimax[row];
626               nrow2 = bilen[row];
627               low2  = 0;
628               high2 = nrow2;
629               bm    = aij->B->rmap->n;
630               ba    = b->a;
631             } else if (col < 0) {
632               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
633                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
634               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
635             }
636           } else col = in[j];
637           nonew = b->nonew;
638           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
639         }
640       }
641     } else {
642       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
643       if (!aij->donotstash) {
644         mat->assembled = PETSC_FALSE;
645         if (roworiented) {
646           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
647         } else {
648           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
649         }
650       }
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
660 */
661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
662 {
663   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
664   Mat            A           = aij->A; /* diagonal part of the matrix */
665   Mat            B           = aij->B; /* offdiagonal part of the matrix */
666   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
667   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
668   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
669   PetscInt       *ailen      = a->ilen,*aj = a->j;
670   PetscInt       *bilen      = b->ilen,*bj = b->j;
671   PetscInt       am          = aij->A->rmap->n,j;
672   PetscInt       diag_so_far = 0,dnz;
673   PetscInt       offd_so_far = 0,onz;
674 
675   PetscFunctionBegin;
676   /* Iterate over all rows of the matrix */
677   for (j=0; j<am; j++) {
678     dnz = onz = 0;
679     /*  Iterate over all non-zero columns of the current row */
680     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
681       /* If column is in the diagonal */
682       if (mat_j[col] >= cstart && mat_j[col] < cend) {
683         aj[diag_so_far++] = mat_j[col] - cstart;
684         dnz++;
685       } else { /* off-diagonal entries */
686         bj[offd_so_far++] = mat_j[col];
687         onz++;
688       }
689     }
690     ailen[j] = dnz;
691     bilen[j] = onz;
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 /*
697     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
698     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
699     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
700     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
701     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
702 */
703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
704 {
705   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
706   Mat            A      = aij->A; /* diagonal part of the matrix */
707   Mat            B      = aij->B; /* offdiagonal part of the matrix */
708   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
709   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
710   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
711   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
712   PetscInt       *ailen = a->ilen,*aj = a->j;
713   PetscInt       *bilen = b->ilen,*bj = b->j;
714   PetscInt       am     = aij->A->rmap->n,j;
715   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
716   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
717   PetscScalar    *aa = a->a,*ba = b->a;
718 
719   PetscFunctionBegin;
720   /* Iterate over all rows of the matrix */
721   for (j=0; j<am; j++) {
722     dnz_row = onz_row = 0;
723     rowstart_offd = full_offd_i[j];
724     rowstart_diag = full_diag_i[j];
725     /*  Iterate over all non-zero columns of the current row */
726     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
727       /* If column is in the diagonal */
728       if (mat_j[col] >= cstart && mat_j[col] < cend) {
729         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
730         aa[rowstart_diag+dnz_row] = mat_a[col];
731         dnz_row++;
732       } else { /* off-diagonal entries */
733         bj[rowstart_offd+onz_row] = mat_j[col];
734         ba[rowstart_offd+onz_row] = mat_a[col];
735         onz_row++;
736       }
737     }
738     ailen[j] = dnz_row;
739     bilen[j] = onz_row;
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
745 {
746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
749   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
750 
751   PetscFunctionBegin;
752   for (i=0; i<m; i++) {
753     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
754     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
755     if (idxm[i] >= rstart && idxm[i] < rend) {
756       row = idxm[i] - rstart;
757       for (j=0; j<n; j++) {
758         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
759         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
760         if (idxn[j] >= cstart && idxn[j] < cend) {
761           col  = idxn[j] - cstart;
762           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763         } else {
764           if (!aij->colmap) {
765             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
766           }
767 #if defined(PETSC_USE_CTABLE)
768           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
769           col--;
770 #else
771           col = aij->colmap[idxn[j]] - 1;
772 #endif
773           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
774           else {
775             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776           }
777         }
778       }
779     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
785 
786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
787 {
788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
789   PetscErrorCode ierr;
790   PetscInt       nstash,reallocs;
791 
792   PetscFunctionBegin;
793   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
794 
795   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
796   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
797   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
805   PetscErrorCode ierr;
806   PetscMPIInt    n;
807   PetscInt       i,j,rstart,ncols,flg;
808   PetscInt       *row,*col;
809   PetscBool      other_disassembled;
810   PetscScalar    *val;
811 
812   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
813 
814   PetscFunctionBegin;
815   if (!aij->donotstash && !mat->nooffprocentries) {
816     while (1) {
817       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
818       if (!flg) break;
819 
820       for (i=0; i<n; ) {
821         /* Now identify the consecutive vals belonging to the same row */
822         for (j=i,rstart=row[j]; j<n; j++) {
823           if (row[j] != rstart) break;
824         }
825         if (j < n) ncols = j-i;
826         else       ncols = n-i;
827         /* Now assemble all these values with a single function call */
828         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
829 
830         i = j;
831       }
832     }
833     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
834   }
835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
836   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
837 #endif
838   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
839   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
840 
841   /* determine if any processor has disassembled, if so we must
842      also disassemble ourself, in order that we may reassemble. */
843   /*
844      if nonzero structure of submatrix B cannot change then we know that
845      no processor disassembled thus we can skip this stuff
846   */
847   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
848     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
849     if (mat->was_assembled && !other_disassembled) {
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
852 #endif
853       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
854     }
855   }
856   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
857     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
858   }
859   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
861   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
862 #endif
863   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
865 
866   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
867 
868   aij->rowvalues = 0;
869 
870   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
871   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
875     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
876     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
877   }
878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
879   mat->offloadmask = PETSC_OFFLOAD_BOTH;
880 #endif
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
885 {
886   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
891   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
896 {
897   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
898   PetscObjectState sA, sB;
899   PetscInt        *lrows;
900   PetscInt         r, len;
901   PetscBool        cong, lch, gch;
902   PetscErrorCode   ierr;
903 
904   PetscFunctionBegin;
905   /* get locally owned rows */
906   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
907   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
908   /* fix right hand side if needed */
909   if (x && b) {
910     const PetscScalar *xx;
911     PetscScalar       *bb;
912 
913     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
914     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
917     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
918     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
919   }
920 
921   sA = mat->A->nonzerostate;
922   sB = mat->B->nonzerostate;
923 
924   if (diag != 0.0 && cong) {
925     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
926     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
927   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
928     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
929     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
930     PetscInt   nnwA, nnwB;
931     PetscBool  nnzA, nnzB;
932 
933     nnwA = aijA->nonew;
934     nnwB = aijB->nonew;
935     nnzA = aijA->keepnonzeropattern;
936     nnzB = aijB->keepnonzeropattern;
937     if (!nnzA) {
938       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
939       aijA->nonew = 0;
940     }
941     if (!nnzB) {
942       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
943       aijB->nonew = 0;
944     }
945     /* Must zero here before the next loop */
946     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
947     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
948     for (r = 0; r < len; ++r) {
949       const PetscInt row = lrows[r] + A->rmap->rstart;
950       if (row >= A->cmap->N) continue;
951       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
952     }
953     aijA->nonew = nnwA;
954     aijB->nonew = nnwB;
955   } else {
956     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
957     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958   }
959   ierr = PetscFree(lrows);CHKERRQ(ierr);
960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962 
963   /* reduce nonzerostate */
964   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
965   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
966   if (gch) A->nonzerostate++;
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
971 {
972   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode    ierr;
974   PetscMPIInt       n = A->rmap->n;
975   PetscInt          i,j,r,m,len = 0;
976   PetscInt          *lrows,*owners = A->rmap->range;
977   PetscMPIInt       p = 0;
978   PetscSFNode       *rrows;
979   PetscSF           sf;
980   const PetscScalar *xx;
981   PetscScalar       *bb,*mask;
982   Vec               xmask,lmask;
983   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
984   const PetscInt    *aj, *ii,*ridx;
985   PetscScalar       *aa;
986 
987   PetscFunctionBegin;
988   /* Create SF where leaves are input rows and roots are owned rows */
989   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
990   for (r = 0; r < n; ++r) lrows[r] = -1;
991   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
992   for (r = 0; r < N; ++r) {
993     const PetscInt idx   = rows[r];
994     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
995     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
996       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
997     }
998     rrows[r].rank  = p;
999     rrows[r].index = rows[r] - owners[p];
1000   }
1001   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1002   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1003   /* Collect flags for rows to be zeroed */
1004   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1005   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1006   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1007   /* Compress and put in row numbers */
1008   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1009   /* zero diagonal part of matrix */
1010   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1011   /* handle off diagonal part of matrix */
1012   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1013   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1014   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1015   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1016   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1017   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1020   if (x && b) { /* this code is buggy when the row and column layout don't match */
1021     PetscBool cong;
1022 
1023     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1024     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1025     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1028     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1029   }
1030   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1031   /* remove zeroed rows of off diagonal matrix */
1032   ii = aij->i;
1033   for (i=0; i<len; i++) {
1034     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1035   }
1036   /* loop over all elements of off process part of matrix zeroing removed columns*/
1037   if (aij->compressedrow.use) {
1038     m    = aij->compressedrow.nrows;
1039     ii   = aij->compressedrow.i;
1040     ridx = aij->compressedrow.rindex;
1041     for (i=0; i<m; i++) {
1042       n  = ii[i+1] - ii[i];
1043       aj = aij->j + ii[i];
1044       aa = aij->a + ii[i];
1045 
1046       for (j=0; j<n; j++) {
1047         if (PetscAbsScalar(mask[*aj])) {
1048           if (b) bb[*ridx] -= *aa*xx[*aj];
1049           *aa = 0.0;
1050         }
1051         aa++;
1052         aj++;
1053       }
1054       ridx++;
1055     }
1056   } else { /* do not use compressed row format */
1057     m = l->B->rmap->n;
1058     for (i=0; i<m; i++) {
1059       n  = ii[i+1] - ii[i];
1060       aj = aij->j + ii[i];
1061       aa = aij->a + ii[i];
1062       for (j=0; j<n; j++) {
1063         if (PetscAbsScalar(mask[*aj])) {
1064           if (b) bb[i] -= *aa*xx[*aj];
1065           *aa = 0.0;
1066         }
1067         aa++;
1068         aj++;
1069       }
1070     }
1071   }
1072   if (x && b) {
1073     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1074     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1075   }
1076   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1077   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1078   ierr = PetscFree(lrows);CHKERRQ(ierr);
1079 
1080   /* only change matrix nonzero state if pattern was allowed to be changed */
1081   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1082     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1083     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092   PetscInt       nt;
1093   VecScatter     Mvctx = a->Mvctx;
1094 
1095   PetscFunctionBegin;
1096   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1097   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1098 
1099   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1100   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1101   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1102   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1107 {
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   VecScatter     Mvctx = a->Mvctx;
1121 
1122   PetscFunctionBegin;
1123   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1124   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1125   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1126   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1127   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   /* do nondiagonal part */
1138   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1139   /* do local part */
1140   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141   /* add partial results together */
1142   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1148 {
1149   MPI_Comm       comm;
1150   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1151   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1152   IS             Me,Notme;
1153   PetscErrorCode ierr;
1154   PetscInt       M,N,first,last,*notme,i;
1155   PetscBool      lf;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159   /* Easy test: symmetric diagonal block */
1160   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1161   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1162   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1190 {
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1199 {
1200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   /* do nondiagonal part */
1205   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* add partial results together */
1209   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /*
1215   This only works correctly for square matrices where the subblock A->A is the
1216    diagonal block
1217 */
1218 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1219 {
1220   PetscErrorCode ierr;
1221   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1222 
1223   PetscFunctionBegin;
1224   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1225   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1226   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1231 {
1232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1237   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1242 {
1243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247 #if defined(PETSC_USE_LOG)
1248   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1249 #endif
1250   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1251   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1252   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1254 #if defined(PETSC_USE_CTABLE)
1255   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1256 #else
1257   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1258 #endif
1259   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1261   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1262   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1277 #if defined(PETSC_HAVE_ELEMENTAL)
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1279 #endif
1280 #if defined(PETSC_HAVE_HYPRE)
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1283 #endif
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1292   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1293   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1294   PetscErrorCode ierr;
1295   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1296   int            fd;
1297   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1298   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1299   PetscScalar    *column_values;
1300   PetscInt       message_count,flowcontrolcount;
1301   FILE           *file;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308   if (!rank) {
1309     header[0] = MAT_FILE_CLASSID;
1310     header[1] = mat->rmap->N;
1311     header[2] = mat->cmap->N;
1312 
1313     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1314     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     /* get largest number of rows any processor has */
1316     rlen  = mat->rmap->n;
1317     range = mat->rmap->range;
1318     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1319   } else {
1320     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1321     rlen = mat->rmap->n;
1322   }
1323 
1324   /* load up the local row counts */
1325   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1326   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1327 
1328   /* store the row lengths to the file */
1329   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1330   if (!rank) {
1331     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     for (i=1; i<size; i++) {
1333       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1334       rlen = range[i+1] - range[i];
1335       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     }
1338     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1339   } else {
1340     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1341     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1342     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1343   }
1344   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1345 
1346   /* load up the local column indices */
1347   nzmax = nz; /* th processor needs space a largest processor needs */
1348   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1349   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1350   cnt   = 0;
1351   for (i=0; i<mat->rmap->n; i++) {
1352     for (j=B->i[i]; j<B->i[i+1]; j++) {
1353       if ((col = garray[B->j[j]]) > cstart) break;
1354       column_indices[cnt++] = col;
1355     }
1356     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1357     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1358   }
1359   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1360 
1361   /* store the column indices to the file */
1362   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1363   if (!rank) {
1364     MPI_Status status;
1365     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1366     for (i=1; i<size; i++) {
1367       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1368       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1369       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1370       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1371       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1372     }
1373     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1374   } else {
1375     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1376     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1379   }
1380   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1381 
1382   /* load up the local column values */
1383   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1384   cnt  = 0;
1385   for (i=0; i<mat->rmap->n; i++) {
1386     for (j=B->i[i]; j<B->i[i+1]; j++) {
1387       if (garray[B->j[j]] > cstart) break;
1388       column_values[cnt++] = B->a[j];
1389     }
1390     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1391     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1392   }
1393   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1394 
1395   /* store the column values to the file */
1396   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1397   if (!rank) {
1398     MPI_Status status;
1399     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1400     for (i=1; i<size; i++) {
1401       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1402       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1403       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1404       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1405       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     }
1407     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1410     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1411     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1412     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1413   }
1414   ierr = PetscFree(column_values);CHKERRQ(ierr);
1415 
1416   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1417   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #include <petscdraw.h>
1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1423 {
1424   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1425   PetscErrorCode    ierr;
1426   PetscMPIInt       rank = aij->rank,size = aij->size;
1427   PetscBool         isdraw,iascii,isbinary;
1428   PetscViewer       sviewer;
1429   PetscViewerFormat format;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1435   if (iascii) {
1436     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1437     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1438       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1439       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1440       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1441       for (i=0; i<(PetscInt)size; i++) {
1442         nmax = PetscMax(nmax,nz[i]);
1443         nmin = PetscMin(nmin,nz[i]);
1444         navg += nz[i];
1445       }
1446       ierr = PetscFree(nz);CHKERRQ(ierr);
1447       navg = navg/size;
1448       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1449       PetscFunctionReturn(0);
1450     }
1451     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1452     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1453       MatInfo   info;
1454       PetscBool inodes;
1455 
1456       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1457       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1458       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1460       if (!inodes) {
1461         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1462                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1463       } else {
1464         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1465                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1466       }
1467       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1468       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1469       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1470       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1471       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1473       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1474       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1475       PetscFunctionReturn(0);
1476     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1477       PetscInt inodecount,inodelimit,*inodes;
1478       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1479       if (inodes) {
1480         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1481       } else {
1482         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1483       }
1484       PetscFunctionReturn(0);
1485     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1486       PetscFunctionReturn(0);
1487     }
1488   } else if (isbinary) {
1489     if (size == 1) {
1490       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1491       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1492     } else {
1493       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1494     }
1495     PetscFunctionReturn(0);
1496   } else if (iascii && size == 1) {
1497     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     PetscFunctionReturn(0);
1500   } else if (isdraw) {
1501     PetscDraw draw;
1502     PetscBool isnull;
1503     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1504     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1505     if (isnull) PetscFunctionReturn(0);
1506   }
1507 
1508   { /* assemble the entire matrix onto first processor */
1509     Mat A = NULL, Av;
1510     IS  isrow,iscol;
1511 
1512     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1513     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1514     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1515     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1516 /*  The commented code uses MatCreateSubMatrices instead */
1517 /*
1518     Mat *AA, A = NULL, Av;
1519     IS  isrow,iscol;
1520 
1521     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1522     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1523     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1524     if (!rank) {
1525        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1526        A    = AA[0];
1527        Av   = AA[0];
1528     }
1529     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1530 */
1531     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1532     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1533     /*
1534        Everyone has to call to draw the matrix since the graphics waits are
1535        synchronized across all processors that share the PetscDraw object
1536     */
1537     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1538     if (!rank) {
1539       if (((PetscObject)mat)->name) {
1540         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1541       }
1542       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1543     }
1544     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1545     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1546     ierr = MatDestroy(&A);CHKERRQ(ierr);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1552 {
1553   PetscErrorCode ierr;
1554   PetscBool      iascii,isdraw,issocket,isbinary;
1555 
1556   PetscFunctionBegin;
1557   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1558   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1559   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1560   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1561   if (iascii || isdraw || isbinary || issocket) {
1562     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1568 {
1569   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1570   PetscErrorCode ierr;
1571   Vec            bb1 = 0;
1572   PetscBool      hasop;
1573 
1574   PetscFunctionBegin;
1575   if (flag == SOR_APPLY_UPPER) {
1576     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1577     PetscFunctionReturn(0);
1578   }
1579 
1580   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1581     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1582   }
1583 
1584   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1585     if (flag & SOR_ZERO_INITIAL_GUESS) {
1586       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1587       its--;
1588     }
1589 
1590     while (its--) {
1591       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593 
1594       /* update rhs: bb1 = bb - B*x */
1595       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1596       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1597 
1598       /* local sweep */
1599       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1600     }
1601   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1602     if (flag & SOR_ZERO_INITIAL_GUESS) {
1603       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1604       its--;
1605     }
1606     while (its--) {
1607       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609 
1610       /* update rhs: bb1 = bb - B*x */
1611       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1612       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1613 
1614       /* local sweep */
1615       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1616     }
1617   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622     while (its--) {
1623       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626       /* update rhs: bb1 = bb - B*x */
1627       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1628       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1629 
1630       /* local sweep */
1631       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1632     }
1633   } else if (flag & SOR_EISENSTAT) {
1634     Vec xx1;
1635 
1636     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1637     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1638 
1639     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641     if (!mat->diag) {
1642       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1643       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1644     }
1645     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1646     if (hasop) {
1647       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1648     } else {
1649       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1650     }
1651     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1652 
1653     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1654 
1655     /* local sweep */
1656     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1657     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1658     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1659   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1660 
1661   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1662 
1663   matin->factorerrortype = mat->A->factorerrortype;
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1668 {
1669   Mat            aA,aB,Aperm;
1670   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1671   PetscScalar    *aa,*ba;
1672   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1673   PetscSF        rowsf,sf;
1674   IS             parcolp = NULL;
1675   PetscBool      done;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1680   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1681   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1682   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1683 
1684   /* Invert row permutation to find out where my rows should go */
1685   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1686   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1687   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1688   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1689   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1690   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1691 
1692   /* Invert column permutation to find out where my columns should go */
1693   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1694   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1695   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1696   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1697   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1698   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1699   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1700 
1701   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1702   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1703   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1704 
1705   /* Find out where my gcols should go */
1706   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1707   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1708   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1709   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1710   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1711   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1712   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1713   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1714 
1715   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1716   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1717   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1718   for (i=0; i<m; i++) {
1719     PetscInt    row = rdest[i];
1720     PetscMPIInt rowner;
1721     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1722     for (j=ai[i]; j<ai[i+1]; j++) {
1723       PetscInt    col = cdest[aj[j]];
1724       PetscMPIInt cowner;
1725       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1726       if (rowner == cowner) dnnz[i]++;
1727       else onnz[i]++;
1728     }
1729     for (j=bi[i]; j<bi[i+1]; j++) {
1730       PetscInt    col = gcdest[bj[j]];
1731       PetscMPIInt cowner;
1732       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1733       if (rowner == cowner) dnnz[i]++;
1734       else onnz[i]++;
1735     }
1736   }
1737   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1738   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1739   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1740   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1741   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1742 
1743   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1744   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1745   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1746   for (i=0; i<m; i++) {
1747     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1748     PetscInt j0,rowlen;
1749     rowlen = ai[i+1] - ai[i];
1750     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1751       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1752       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1753     }
1754     rowlen = bi[i+1] - bi[i];
1755     for (j0=j=0; j<rowlen; j0=j) {
1756       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1757       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1758     }
1759   }
1760   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1761   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1762   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1763   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1764   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1765   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1766   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1767   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1768   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1769   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1770   *B = Aperm;
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1775 {
1776   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1781   if (ghosts) *ghosts = aij->garray;
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1786 {
1787   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1788   Mat            A    = mat->A,B = mat->B;
1789   PetscErrorCode ierr;
1790   PetscLogDouble isend[5],irecv[5];
1791 
1792   PetscFunctionBegin;
1793   info->block_size = 1.0;
1794   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1795 
1796   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1797   isend[3] = info->memory;  isend[4] = info->mallocs;
1798 
1799   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1800 
1801   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1802   isend[3] += info->memory;  isend[4] += info->mallocs;
1803   if (flag == MAT_LOCAL) {
1804     info->nz_used      = isend[0];
1805     info->nz_allocated = isend[1];
1806     info->nz_unneeded  = isend[2];
1807     info->memory       = isend[3];
1808     info->mallocs      = isend[4];
1809   } else if (flag == MAT_GLOBAL_MAX) {
1810     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1811 
1812     info->nz_used      = irecv[0];
1813     info->nz_allocated = irecv[1];
1814     info->nz_unneeded  = irecv[2];
1815     info->memory       = irecv[3];
1816     info->mallocs      = irecv[4];
1817   } else if (flag == MAT_GLOBAL_SUM) {
1818     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1819 
1820     info->nz_used      = irecv[0];
1821     info->nz_allocated = irecv[1];
1822     info->nz_unneeded  = irecv[2];
1823     info->memory       = irecv[3];
1824     info->mallocs      = irecv[4];
1825   }
1826   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1827   info->fill_ratio_needed = 0;
1828   info->factor_mallocs    = 0;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1833 {
1834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1835   PetscErrorCode ierr;
1836 
1837   PetscFunctionBegin;
1838   switch (op) {
1839   case MAT_NEW_NONZERO_LOCATIONS:
1840   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1841   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1842   case MAT_KEEP_NONZERO_PATTERN:
1843   case MAT_NEW_NONZERO_LOCATION_ERR:
1844   case MAT_USE_INODES:
1845   case MAT_IGNORE_ZERO_ENTRIES:
1846     MatCheckPreallocated(A,1);
1847     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1848     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1849     break;
1850   case MAT_ROW_ORIENTED:
1851     MatCheckPreallocated(A,1);
1852     a->roworiented = flg;
1853 
1854     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1855     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1856     break;
1857   case MAT_NEW_DIAGONALS:
1858   case MAT_SORTED_FULL:
1859     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1860     break;
1861   case MAT_IGNORE_OFF_PROC_ENTRIES:
1862     a->donotstash = flg;
1863     break;
1864   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1865   case MAT_SPD:
1866   case MAT_SYMMETRIC:
1867   case MAT_STRUCTURALLY_SYMMETRIC:
1868   case MAT_HERMITIAN:
1869   case MAT_SYMMETRY_ETERNAL:
1870     break;
1871   case MAT_SUBMAT_SINGLEIS:
1872     A->submat_singleis = flg;
1873     break;
1874   case MAT_STRUCTURE_ONLY:
1875     /* The option is handled directly by MatSetOption() */
1876     break;
1877   default:
1878     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1884 {
1885   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1886   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1887   PetscErrorCode ierr;
1888   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1889   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1890   PetscInt       *cmap,*idx_p;
1891 
1892   PetscFunctionBegin;
1893   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1894   mat->getrowactive = PETSC_TRUE;
1895 
1896   if (!mat->rowvalues && (idx || v)) {
1897     /*
1898         allocate enough space to hold information from the longest row.
1899     */
1900     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1901     PetscInt   max = 1,tmp;
1902     for (i=0; i<matin->rmap->n; i++) {
1903       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1904       if (max < tmp) max = tmp;
1905     }
1906     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1907   }
1908 
1909   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1910   lrow = row - rstart;
1911 
1912   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1913   if (!v)   {pvA = 0; pvB = 0;}
1914   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1915   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1916   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1917   nztot = nzA + nzB;
1918 
1919   cmap = mat->garray;
1920   if (v  || idx) {
1921     if (nztot) {
1922       /* Sort by increasing column numbers, assuming A and B already sorted */
1923       PetscInt imark = -1;
1924       if (v) {
1925         *v = v_p = mat->rowvalues;
1926         for (i=0; i<nzB; i++) {
1927           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1928           else break;
1929         }
1930         imark = i;
1931         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1932         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1933       }
1934       if (idx) {
1935         *idx = idx_p = mat->rowindices;
1936         if (imark > -1) {
1937           for (i=0; i<imark; i++) {
1938             idx_p[i] = cmap[cworkB[i]];
1939           }
1940         } else {
1941           for (i=0; i<nzB; i++) {
1942             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1943             else break;
1944           }
1945           imark = i;
1946         }
1947         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1948         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1949       }
1950     } else {
1951       if (idx) *idx = 0;
1952       if (v)   *v   = 0;
1953     }
1954   }
1955   *nz  = nztot;
1956   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1957   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1962 {
1963   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1964 
1965   PetscFunctionBegin;
1966   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1967   aij->getrowactive = PETSC_FALSE;
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1972 {
1973   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1974   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1975   PetscErrorCode ierr;
1976   PetscInt       i,j,cstart = mat->cmap->rstart;
1977   PetscReal      sum = 0.0;
1978   MatScalar      *v;
1979 
1980   PetscFunctionBegin;
1981   if (aij->size == 1) {
1982     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1983   } else {
1984     if (type == NORM_FROBENIUS) {
1985       v = amat->a;
1986       for (i=0; i<amat->nz; i++) {
1987         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1988       }
1989       v = bmat->a;
1990       for (i=0; i<bmat->nz; i++) {
1991         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1992       }
1993       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1994       *norm = PetscSqrtReal(*norm);
1995       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1996     } else if (type == NORM_1) { /* max column norm */
1997       PetscReal *tmp,*tmp2;
1998       PetscInt  *jj,*garray = aij->garray;
1999       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
2000       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
2001       *norm = 0.0;
2002       v     = amat->a; jj = amat->j;
2003       for (j=0; j<amat->nz; j++) {
2004         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2005       }
2006       v = bmat->a; jj = bmat->j;
2007       for (j=0; j<bmat->nz; j++) {
2008         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2009       }
2010       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2011       for (j=0; j<mat->cmap->N; j++) {
2012         if (tmp2[j] > *norm) *norm = tmp2[j];
2013       }
2014       ierr = PetscFree(tmp);CHKERRQ(ierr);
2015       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2016       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2017     } else if (type == NORM_INFINITY) { /* max row norm */
2018       PetscReal ntemp = 0.0;
2019       for (j=0; j<aij->A->rmap->n; j++) {
2020         v   = amat->a + amat->i[j];
2021         sum = 0.0;
2022         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2023           sum += PetscAbsScalar(*v); v++;
2024         }
2025         v = bmat->a + bmat->i[j];
2026         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2027           sum += PetscAbsScalar(*v); v++;
2028         }
2029         if (sum > ntemp) ntemp = sum;
2030       }
2031       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2032       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2033     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2039 {
2040   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2041   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2042   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2043   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2044   PetscErrorCode  ierr;
2045   Mat             B,A_diag,*B_diag;
2046   const MatScalar *array;
2047 
2048   PetscFunctionBegin;
2049   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2050   ai = Aloc->i; aj = Aloc->j;
2051   bi = Bloc->i; bj = Bloc->j;
2052   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2053     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2054     PetscSFNode          *oloc;
2055     PETSC_UNUSED PetscSF sf;
2056 
2057     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2058     /* compute d_nnz for preallocation */
2059     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2060     for (i=0; i<ai[ma]; i++) {
2061       d_nnz[aj[i]]++;
2062     }
2063     /* compute local off-diagonal contributions */
2064     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2065     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2066     /* map those to global */
2067     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2068     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2069     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2070     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2071     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2072     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2073     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2074 
2075     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2076     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2077     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2078     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2079     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2080     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2081   } else {
2082     B    = *matout;
2083     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2084   }
2085 
2086   b           = (Mat_MPIAIJ*)B->data;
2087   A_diag      = a->A;
2088   B_diag      = &b->A;
2089   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2090   A_diag_ncol = A_diag->cmap->N;
2091   B_diag_ilen = sub_B_diag->ilen;
2092   B_diag_i    = sub_B_diag->i;
2093 
2094   /* Set ilen for diagonal of B */
2095   for (i=0; i<A_diag_ncol; i++) {
2096     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2097   }
2098 
2099   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2100   very quickly (=without using MatSetValues), because all writes are local. */
2101   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2102 
2103   /* copy over the B part */
2104   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2105   array = Bloc->a;
2106   row   = A->rmap->rstart;
2107   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2108   cols_tmp = cols;
2109   for (i=0; i<mb; i++) {
2110     ncol = bi[i+1]-bi[i];
2111     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2112     row++;
2113     array += ncol; cols_tmp += ncol;
2114   }
2115   ierr = PetscFree(cols);CHKERRQ(ierr);
2116 
2117   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2118   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2120     *matout = B;
2121   } else {
2122     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2128 {
2129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2130   Mat            a    = aij->A,b = aij->B;
2131   PetscErrorCode ierr;
2132   PetscInt       s1,s2,s3;
2133 
2134   PetscFunctionBegin;
2135   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2136   if (rr) {
2137     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2138     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2139     /* Overlap communication with computation. */
2140     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2141   }
2142   if (ll) {
2143     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2144     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2145     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2146   }
2147   /* scale  the diagonal block */
2148   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2149 
2150   if (rr) {
2151     /* Do a scatter end and then right scale the off-diagonal block */
2152     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2153     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2154   }
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2159 {
2160   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2161   PetscErrorCode ierr;
2162 
2163   PetscFunctionBegin;
2164   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2169 {
2170   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2171   Mat            a,b,c,d;
2172   PetscBool      flg;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   a = matA->A; b = matA->B;
2177   c = matB->A; d = matB->B;
2178 
2179   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2180   if (flg) {
2181     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2182   }
2183   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2188 {
2189   PetscErrorCode ierr;
2190   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2191   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2192 
2193   PetscFunctionBegin;
2194   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2195   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2196     /* because of the column compression in the off-processor part of the matrix a->B,
2197        the number of columns in a->B and b->B may be different, hence we cannot call
2198        the MatCopy() directly on the two parts. If need be, we can provide a more
2199        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2200        then copying the submatrices */
2201     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2202   } else {
2203     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2204     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2205   }
2206   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2211 {
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 /*
2220    Computes the number of nonzeros per row needed for preallocation when X and Y
2221    have different nonzero structure.
2222 */
2223 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2224 {
2225   PetscInt       i,j,k,nzx,nzy;
2226 
2227   PetscFunctionBegin;
2228   /* Set the number of nonzeros in the new matrix */
2229   for (i=0; i<m; i++) {
2230     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2231     nzx = xi[i+1] - xi[i];
2232     nzy = yi[i+1] - yi[i];
2233     nnz[i] = 0;
2234     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2235       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2236       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2237       nnz[i]++;
2238     }
2239     for (; k<nzy; k++) nnz[i]++;
2240   }
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2245 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2246 {
2247   PetscErrorCode ierr;
2248   PetscInt       m = Y->rmap->N;
2249   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2250   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2258 {
2259   PetscErrorCode ierr;
2260   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2261   PetscBLASInt   bnz,one=1;
2262   Mat_SeqAIJ     *x,*y;
2263 
2264   PetscFunctionBegin;
2265   if (str == SAME_NONZERO_PATTERN) {
2266     PetscScalar alpha = a;
2267     x    = (Mat_SeqAIJ*)xx->A->data;
2268     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2269     y    = (Mat_SeqAIJ*)yy->A->data;
2270     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2271     x    = (Mat_SeqAIJ*)xx->B->data;
2272     y    = (Mat_SeqAIJ*)yy->B->data;
2273     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2274     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2275     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2276     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2277        will be updated */
2278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2279     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2280       Y->offloadmask = PETSC_OFFLOAD_CPU;
2281     }
2282 #endif
2283   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2284     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2285   } else {
2286     Mat      B;
2287     PetscInt *nnz_d,*nnz_o;
2288     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2289     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2290     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2291     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2292     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2293     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2294     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2295     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2296     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2297     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2298     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2299     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2300     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2301     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2302   }
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2307 
2308 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2309 {
2310 #if defined(PETSC_USE_COMPLEX)
2311   PetscErrorCode ierr;
2312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2316   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2317 #else
2318   PetscFunctionBegin;
2319 #endif
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2324 {
2325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2330   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2335 {
2336   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2341   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2346 {
2347   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2348   PetscErrorCode ierr;
2349   PetscInt       i,*idxb = 0;
2350   PetscScalar    *va,*vb;
2351   Vec            vtmp;
2352 
2353   PetscFunctionBegin;
2354   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2355   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2356   if (idx) {
2357     for (i=0; i<A->rmap->n; i++) {
2358       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2359     }
2360   }
2361 
2362   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2363   if (idx) {
2364     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2365   }
2366   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2367   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2368 
2369   for (i=0; i<A->rmap->n; i++) {
2370     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2371       va[i] = vb[i];
2372       if (idx) idx[i] = a->garray[idxb[i]];
2373     }
2374   }
2375 
2376   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2378   ierr = PetscFree(idxb);CHKERRQ(ierr);
2379   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2384 {
2385   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2386   PetscErrorCode ierr;
2387   PetscInt       i,*idxb = 0;
2388   PetscScalar    *va,*vb;
2389   Vec            vtmp;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2394   if (idx) {
2395     for (i=0; i<A->cmap->n; i++) {
2396       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2397     }
2398   }
2399 
2400   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2401   if (idx) {
2402     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2403   }
2404   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2405   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2406 
2407   for (i=0; i<A->rmap->n; i++) {
2408     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2409       va[i] = vb[i];
2410       if (idx) idx[i] = a->garray[idxb[i]];
2411     }
2412   }
2413 
2414   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2415   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2416   ierr = PetscFree(idxb);CHKERRQ(ierr);
2417   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2422 {
2423   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2424   PetscInt       n      = A->rmap->n;
2425   PetscInt       cstart = A->cmap->rstart;
2426   PetscInt       *cmap  = mat->garray;
2427   PetscInt       *diagIdx, *offdiagIdx;
2428   Vec            diagV, offdiagV;
2429   PetscScalar    *a, *diagA, *offdiagA;
2430   PetscInt       r;
2431   PetscErrorCode ierr;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2436   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2437   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2438   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2440   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2441   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2442   for (r = 0; r < n; ++r) {
2443     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2444       a[r]   = diagA[r];
2445       idx[r] = cstart + diagIdx[r];
2446     } else {
2447       a[r]   = offdiagA[r];
2448       idx[r] = cmap[offdiagIdx[r]];
2449     }
2450   }
2451   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2452   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2453   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2454   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2455   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2456   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2461 {
2462   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2463   PetscInt       n      = A->rmap->n;
2464   PetscInt       cstart = A->cmap->rstart;
2465   PetscInt       *cmap  = mat->garray;
2466   PetscInt       *diagIdx, *offdiagIdx;
2467   Vec            diagV, offdiagV;
2468   PetscScalar    *a, *diagA, *offdiagA;
2469   PetscInt       r;
2470   PetscErrorCode ierr;
2471 
2472   PetscFunctionBegin;
2473   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2476   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2477   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2478   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2479   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2480   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2481   for (r = 0; r < n; ++r) {
2482     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2483       a[r]   = diagA[r];
2484       idx[r] = cstart + diagIdx[r];
2485     } else {
2486       a[r]   = offdiagA[r];
2487       idx[r] = cmap[offdiagIdx[r]];
2488     }
2489   }
2490   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2491   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2492   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2493   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2494   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2495   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2500 {
2501   PetscErrorCode ierr;
2502   Mat            *dummy;
2503 
2504   PetscFunctionBegin;
2505   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2506   *newmat = *dummy;
2507   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2512 {
2513   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2514   PetscErrorCode ierr;
2515 
2516   PetscFunctionBegin;
2517   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2518   A->factorerrortype = a->A->factorerrortype;
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2523 {
2524   PetscErrorCode ierr;
2525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2526 
2527   PetscFunctionBegin;
2528   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2529   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2530   if (x->assembled) {
2531     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2532   } else {
2533     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2534   }
2535   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2536   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2541 {
2542   PetscFunctionBegin;
2543   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2544   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 /*@
2549    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2550 
2551    Collective on Mat
2552 
2553    Input Parameters:
2554 +    A - the matrix
2555 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2556 
2557  Level: advanced
2558 
2559 @*/
2560 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2561 {
2562   PetscErrorCode       ierr;
2563 
2564   PetscFunctionBegin;
2565   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2570 {
2571   PetscErrorCode       ierr;
2572   PetscBool            sc = PETSC_FALSE,flg;
2573 
2574   PetscFunctionBegin;
2575   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2576   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2577   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2578   if (flg) {
2579     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2580   }
2581   ierr = PetscOptionsTail();CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2586 {
2587   PetscErrorCode ierr;
2588   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2589   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2590 
2591   PetscFunctionBegin;
2592   if (!Y->preallocated) {
2593     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2594   } else if (!aij->nz) {
2595     PetscInt nonew = aij->nonew;
2596     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2597     aij->nonew = nonew;
2598   }
2599   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2604 {
2605   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2606   PetscErrorCode ierr;
2607 
2608   PetscFunctionBegin;
2609   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2610   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2611   if (d) {
2612     PetscInt rstart;
2613     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2614     *d += rstart;
2615 
2616   }
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2621 {
2622   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2623   PetscErrorCode ierr;
2624 
2625   PetscFunctionBegin;
2626   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 /* -------------------------------------------------------------------*/
2631 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2632                                        MatGetRow_MPIAIJ,
2633                                        MatRestoreRow_MPIAIJ,
2634                                        MatMult_MPIAIJ,
2635                                 /* 4*/ MatMultAdd_MPIAIJ,
2636                                        MatMultTranspose_MPIAIJ,
2637                                        MatMultTransposeAdd_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*10*/ 0,
2642                                        0,
2643                                        0,
2644                                        MatSOR_MPIAIJ,
2645                                        MatTranspose_MPIAIJ,
2646                                 /*15*/ MatGetInfo_MPIAIJ,
2647                                        MatEqual_MPIAIJ,
2648                                        MatGetDiagonal_MPIAIJ,
2649                                        MatDiagonalScale_MPIAIJ,
2650                                        MatNorm_MPIAIJ,
2651                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2652                                        MatAssemblyEnd_MPIAIJ,
2653                                        MatSetOption_MPIAIJ,
2654                                        MatZeroEntries_MPIAIJ,
2655                                 /*24*/ MatZeroRows_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*29*/ MatSetUp_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        MatGetDiagonalBlock_MPIAIJ,
2664                                        0,
2665                                 /*34*/ MatDuplicate_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*39*/ MatAXPY_MPIAIJ,
2671                                        MatCreateSubMatrices_MPIAIJ,
2672                                        MatIncreaseOverlap_MPIAIJ,
2673                                        MatGetValues_MPIAIJ,
2674                                        MatCopy_MPIAIJ,
2675                                 /*44*/ MatGetRowMax_MPIAIJ,
2676                                        MatScale_MPIAIJ,
2677                                        MatShift_MPIAIJ,
2678                                        MatDiagonalSet_MPIAIJ,
2679                                        MatZeroRowsColumns_MPIAIJ,
2680                                 /*49*/ MatSetRandom_MPIAIJ,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2686                                        0,
2687                                        MatSetUnfactored_MPIAIJ,
2688                                        MatPermute_MPIAIJ,
2689                                        0,
2690                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2691                                        MatDestroy_MPIAIJ,
2692                                        MatView_MPIAIJ,
2693                                        0,
2694                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2695                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2696                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2701                                        MatGetRowMinAbs_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                 /*75*/ MatFDColoringApply_AIJ,
2707                                        MatSetFromOptions_MPIAIJ,
2708                                        0,
2709                                        0,
2710                                        MatFindZeroDiagonals_MPIAIJ,
2711                                 /*80*/ 0,
2712                                        0,
2713                                        0,
2714                                 /*83*/ MatLoad_MPIAIJ,
2715                                        MatIsSymmetric_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2721                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2722                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2723                                        MatPtAP_MPIAIJ_MPIAIJ,
2724                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2725                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                        MatPinToCPU_MPIAIJ,
2730                                 /*99*/ 0,
2731                                        0,
2732                                        0,
2733                                        MatConjugate_MPIAIJ,
2734                                        0,
2735                                 /*104*/MatSetValuesRow_MPIAIJ,
2736                                        MatRealPart_MPIAIJ,
2737                                        MatImaginaryPart_MPIAIJ,
2738                                        0,
2739                                        0,
2740                                 /*109*/0,
2741                                        0,
2742                                        MatGetRowMin_MPIAIJ,
2743                                        0,
2744                                        MatMissingDiagonal_MPIAIJ,
2745                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2746                                        0,
2747                                        MatGetGhosts_MPIAIJ,
2748                                        0,
2749                                        0,
2750                                 /*119*/0,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                        MatGetMultiProcBlock_MPIAIJ,
2755                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2756                                        MatGetColumnNorms_MPIAIJ,
2757                                        MatInvertBlockDiagonal_MPIAIJ,
2758                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2759                                        MatCreateSubMatricesMPI_MPIAIJ,
2760                                 /*129*/0,
2761                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2762                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2763                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2764                                        0,
2765                                 /*134*/0,
2766                                        0,
2767                                        MatRARt_MPIAIJ_MPIAIJ,
2768                                        0,
2769                                        0,
2770                                 /*139*/MatSetBlockSizes_MPIAIJ,
2771                                        0,
2772                                        0,
2773                                        MatFDColoringSetUp_MPIXAIJ,
2774                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2775                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2776 };
2777 
2778 /* ----------------------------------------------------------------------------------------*/
2779 
2780 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2781 {
2782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2787   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2792 {
2793   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2798   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2803 {
2804   Mat_MPIAIJ     *b;
2805   PetscErrorCode ierr;
2806   PetscMPIInt    size;
2807 
2808   PetscFunctionBegin;
2809   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2810   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2811   b = (Mat_MPIAIJ*)B->data;
2812 
2813 #if defined(PETSC_USE_CTABLE)
2814   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2815 #else
2816   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2817 #endif
2818   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2819   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2820   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2821 
2822   /* Because the B will have been resized we simply destroy it and create a new one each time */
2823   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2824   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2825   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2826   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2827   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2828   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2829   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2830 
2831   if (!B->preallocated) {
2832     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2833     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2834     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2835     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2836     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2837   }
2838 
2839   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2840   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2841   B->preallocated  = PETSC_TRUE;
2842   B->was_assembled = PETSC_FALSE;
2843   B->assembled     = PETSC_FALSE;
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2848 {
2849   Mat_MPIAIJ     *b;
2850   PetscErrorCode ierr;
2851 
2852   PetscFunctionBegin;
2853   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2854   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2855   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2856   b = (Mat_MPIAIJ*)B->data;
2857 
2858 #if defined(PETSC_USE_CTABLE)
2859   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2860 #else
2861   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2862 #endif
2863   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2864   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2865   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2866 
2867   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2868   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2869   B->preallocated  = PETSC_TRUE;
2870   B->was_assembled = PETSC_FALSE;
2871   B->assembled = PETSC_FALSE;
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2876 {
2877   Mat            mat;
2878   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2879   PetscErrorCode ierr;
2880 
2881   PetscFunctionBegin;
2882   *newmat = 0;
2883   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2884   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2885   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2886   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2887   a       = (Mat_MPIAIJ*)mat->data;
2888 
2889   mat->factortype   = matin->factortype;
2890   mat->assembled    = PETSC_TRUE;
2891   mat->insertmode   = NOT_SET_VALUES;
2892   mat->preallocated = PETSC_TRUE;
2893 
2894   a->size         = oldmat->size;
2895   a->rank         = oldmat->rank;
2896   a->donotstash   = oldmat->donotstash;
2897   a->roworiented  = oldmat->roworiented;
2898   a->rowindices   = 0;
2899   a->rowvalues    = 0;
2900   a->getrowactive = PETSC_FALSE;
2901 
2902   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2903   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2904 
2905   if (oldmat->colmap) {
2906 #if defined(PETSC_USE_CTABLE)
2907     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2908 #else
2909     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2910     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2911     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2912 #endif
2913   } else a->colmap = 0;
2914   if (oldmat->garray) {
2915     PetscInt len;
2916     len  = oldmat->B->cmap->n;
2917     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2918     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2919     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2920   } else a->garray = 0;
2921 
2922   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2923   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2924   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2925   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2926 
2927   if (oldmat->Mvctx_mpi1) {
2928     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2929     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2930   }
2931 
2932   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2933   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2934   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2935   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2936   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2937   *newmat = mat;
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2942 {
2943   PetscBool      isbinary, ishdf5;
2944   PetscErrorCode ierr;
2945 
2946   PetscFunctionBegin;
2947   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2948   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2949   /* force binary viewer to load .info file if it has not yet done so */
2950   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2953   if (isbinary) {
2954     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2955   } else if (ishdf5) {
2956 #if defined(PETSC_HAVE_HDF5)
2957     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2958 #else
2959     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2960 #endif
2961   } else {
2962     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2963   }
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2968 {
2969   PetscScalar    *vals,*svals;
2970   MPI_Comm       comm;
2971   PetscErrorCode ierr;
2972   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2973   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2974   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2975   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2976   PetscInt       cend,cstart,n,*rowners;
2977   int            fd;
2978   PetscInt       bs = newMat->rmap->bs;
2979 
2980   PetscFunctionBegin;
2981   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2982   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2983   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2984   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2985   if (!rank) {
2986     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2987     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2988     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2989   }
2990 
2991   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2992   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2993   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2994   if (bs < 0) bs = 1;
2995 
2996   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2997   M    = header[1]; N = header[2];
2998 
2999   /* If global sizes are set, check if they are consistent with that given in the file */
3000   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
3001   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
3002 
3003   /* determine ownership of all (block) rows */
3004   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3005   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3006   else m = newMat->rmap->n; /* Set by user */
3007 
3008   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3009   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3010 
3011   /* First process needs enough room for process with most rows */
3012   if (!rank) {
3013     mmax = rowners[1];
3014     for (i=2; i<=size; i++) {
3015       mmax = PetscMax(mmax, rowners[i]);
3016     }
3017   } else mmax = -1;             /* unused, but compilers complain */
3018 
3019   rowners[0] = 0;
3020   for (i=2; i<=size; i++) {
3021     rowners[i] += rowners[i-1];
3022   }
3023   rstart = rowners[rank];
3024   rend   = rowners[rank+1];
3025 
3026   /* distribute row lengths to all processors */
3027   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3028   if (!rank) {
3029     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3030     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3031     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3032     for (j=0; j<m; j++) {
3033       procsnz[0] += ourlens[j];
3034     }
3035     for (i=1; i<size; i++) {
3036       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3037       /* calculate the number of nonzeros on each processor */
3038       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3039         procsnz[i] += rowlengths[j];
3040       }
3041       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3042     }
3043     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3044   } else {
3045     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3046   }
3047 
3048   if (!rank) {
3049     /* determine max buffer needed and allocate it */
3050     maxnz = 0;
3051     for (i=0; i<size; i++) {
3052       maxnz = PetscMax(maxnz,procsnz[i]);
3053     }
3054     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3055 
3056     /* read in my part of the matrix column indices  */
3057     nz   = procsnz[0];
3058     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3059     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3060 
3061     /* read in every one elses and ship off */
3062     for (i=1; i<size; i++) {
3063       nz   = procsnz[i];
3064       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3065       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3066     }
3067     ierr = PetscFree(cols);CHKERRQ(ierr);
3068   } else {
3069     /* determine buffer space needed for message */
3070     nz = 0;
3071     for (i=0; i<m; i++) {
3072       nz += ourlens[i];
3073     }
3074     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3075 
3076     /* receive message of column indices*/
3077     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3078   }
3079 
3080   /* determine column ownership if matrix is not square */
3081   if (N != M) {
3082     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3083     else n = newMat->cmap->n;
3084     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3085     cstart = cend - n;
3086   } else {
3087     cstart = rstart;
3088     cend   = rend;
3089     n      = cend - cstart;
3090   }
3091 
3092   /* loop over local rows, determining number of off diagonal entries */
3093   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3094   jj   = 0;
3095   for (i=0; i<m; i++) {
3096     for (j=0; j<ourlens[i]; j++) {
3097       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3098       jj++;
3099     }
3100   }
3101 
3102   for (i=0; i<m; i++) {
3103     ourlens[i] -= offlens[i];
3104   }
3105   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3106 
3107   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3108 
3109   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3110 
3111   for (i=0; i<m; i++) {
3112     ourlens[i] += offlens[i];
3113   }
3114 
3115   if (!rank) {
3116     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3117 
3118     /* read in my part of the matrix numerical values  */
3119     nz   = procsnz[0];
3120     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3121 
3122     /* insert into matrix */
3123     jj      = rstart;
3124     smycols = mycols;
3125     svals   = vals;
3126     for (i=0; i<m; i++) {
3127       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3128       smycols += ourlens[i];
3129       svals   += ourlens[i];
3130       jj++;
3131     }
3132 
3133     /* read in other processors and ship out */
3134     for (i=1; i<size; i++) {
3135       nz   = procsnz[i];
3136       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3137       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3138     }
3139     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3140   } else {
3141     /* receive numeric values */
3142     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3143 
3144     /* receive message of values*/
3145     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3146 
3147     /* insert into matrix */
3148     jj      = rstart;
3149     smycols = mycols;
3150     svals   = vals;
3151     for (i=0; i<m; i++) {
3152       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3153       smycols += ourlens[i];
3154       svals   += ourlens[i];
3155       jj++;
3156     }
3157   }
3158   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3159   ierr = PetscFree(vals);CHKERRQ(ierr);
3160   ierr = PetscFree(mycols);CHKERRQ(ierr);
3161   ierr = PetscFree(rowners);CHKERRQ(ierr);
3162   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 /* Not scalable because of ISAllGather() unless getting all columns. */
3168 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3169 {
3170   PetscErrorCode ierr;
3171   IS             iscol_local;
3172   PetscBool      isstride;
3173   PetscMPIInt    lisstride=0,gisstride;
3174 
3175   PetscFunctionBegin;
3176   /* check if we are grabbing all columns*/
3177   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3178 
3179   if (isstride) {
3180     PetscInt  start,len,mstart,mlen;
3181     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3182     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3183     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3184     if (mstart == start && mlen-mstart == len) lisstride = 1;
3185   }
3186 
3187   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3188   if (gisstride) {
3189     PetscInt N;
3190     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3191     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3192     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3193     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3194   } else {
3195     PetscInt cbs;
3196     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3197     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3198     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3199   }
3200 
3201   *isseq = iscol_local;
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 /*
3206  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3207  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3208 
3209  Input Parameters:
3210    mat - matrix
3211    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3212            i.e., mat->rstart <= isrow[i] < mat->rend
3213    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3214            i.e., mat->cstart <= iscol[i] < mat->cend
3215  Output Parameter:
3216    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3217    iscol_o - sequential column index set for retrieving mat->B
3218    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3219  */
3220 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3221 {
3222   PetscErrorCode ierr;
3223   Vec            x,cmap;
3224   const PetscInt *is_idx;
3225   PetscScalar    *xarray,*cmaparray;
3226   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3227   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3228   Mat            B=a->B;
3229   Vec            lvec=a->lvec,lcmap;
3230   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3231   MPI_Comm       comm;
3232   VecScatter     Mvctx=a->Mvctx;
3233 
3234   PetscFunctionBegin;
3235   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3236   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3237 
3238   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3239   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3240   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3241   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3242   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3243 
3244   /* Get start indices */
3245   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3246   isstart -= ncols;
3247   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3248 
3249   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3250   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3251   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3252   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3253   for (i=0; i<ncols; i++) {
3254     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3255     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3256     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3257   }
3258   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3259   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3260   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3261 
3262   /* Get iscol_d */
3263   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3264   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3265   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3266 
3267   /* Get isrow_d */
3268   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3269   rstart = mat->rmap->rstart;
3270   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3271   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3272   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3273   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3274 
3275   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3276   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3277   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3278 
3279   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3280   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3281   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3282 
3283   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3284 
3285   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3286   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3287 
3288   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3289   /* off-process column indices */
3290   count = 0;
3291   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3292   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3293 
3294   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3295   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3296   for (i=0; i<Bn; i++) {
3297     if (PetscRealPart(xarray[i]) > -1.0) {
3298       idx[count]     = i;                   /* local column index in off-diagonal part B */
3299       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3300       count++;
3301     }
3302   }
3303   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3304   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3305 
3306   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3307   /* cannot ensure iscol_o has same blocksize as iscol! */
3308 
3309   ierr = PetscFree(idx);CHKERRQ(ierr);
3310   *garray = cmap1;
3311 
3312   ierr = VecDestroy(&x);CHKERRQ(ierr);
3313   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3314   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3315   PetscFunctionReturn(0);
3316 }
3317 
3318 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3319 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3320 {
3321   PetscErrorCode ierr;
3322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3323   Mat            M = NULL;
3324   MPI_Comm       comm;
3325   IS             iscol_d,isrow_d,iscol_o;
3326   Mat            Asub = NULL,Bsub = NULL;
3327   PetscInt       n;
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3331 
3332   if (call == MAT_REUSE_MATRIX) {
3333     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3334     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3335     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3336 
3337     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3338     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3339 
3340     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3341     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3342 
3343     /* Update diagonal and off-diagonal portions of submat */
3344     asub = (Mat_MPIAIJ*)(*submat)->data;
3345     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3346     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3347     if (n) {
3348       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3349     }
3350     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3351     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3352 
3353   } else { /* call == MAT_INITIAL_MATRIX) */
3354     const PetscInt *garray;
3355     PetscInt        BsubN;
3356 
3357     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3358     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3359 
3360     /* Create local submatrices Asub and Bsub */
3361     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3362     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3363 
3364     /* Create submatrix M */
3365     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3366 
3367     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3368     asub = (Mat_MPIAIJ*)M->data;
3369 
3370     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3371     n = asub->B->cmap->N;
3372     if (BsubN > n) {
3373       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3374       const PetscInt *idx;
3375       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3376       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3377 
3378       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3379       j = 0;
3380       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3381       for (i=0; i<n; i++) {
3382         if (j >= BsubN) break;
3383         while (subgarray[i] > garray[j]) j++;
3384 
3385         if (subgarray[i] == garray[j]) {
3386           idx_new[i] = idx[j++];
3387         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3388       }
3389       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3390 
3391       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3392       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3393 
3394     } else if (BsubN < n) {
3395       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3396     }
3397 
3398     ierr = PetscFree(garray);CHKERRQ(ierr);
3399     *submat = M;
3400 
3401     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3402     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3403     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3404 
3405     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3406     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3407 
3408     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3409     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3410   }
3411   PetscFunctionReturn(0);
3412 }
3413 
3414 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3415 {
3416   PetscErrorCode ierr;
3417   IS             iscol_local=NULL,isrow_d;
3418   PetscInt       csize;
3419   PetscInt       n,i,j,start,end;
3420   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3421   MPI_Comm       comm;
3422 
3423   PetscFunctionBegin;
3424   /* If isrow has same processor distribution as mat,
3425      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3426   if (call == MAT_REUSE_MATRIX) {
3427     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3428     if (isrow_d) {
3429       sameRowDist  = PETSC_TRUE;
3430       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3431     } else {
3432       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3433       if (iscol_local) {
3434         sameRowDist  = PETSC_TRUE;
3435         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3436       }
3437     }
3438   } else {
3439     /* Check if isrow has same processor distribution as mat */
3440     sameDist[0] = PETSC_FALSE;
3441     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3442     if (!n) {
3443       sameDist[0] = PETSC_TRUE;
3444     } else {
3445       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3446       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3447       if (i >= start && j < end) {
3448         sameDist[0] = PETSC_TRUE;
3449       }
3450     }
3451 
3452     /* Check if iscol has same processor distribution as mat */
3453     sameDist[1] = PETSC_FALSE;
3454     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3455     if (!n) {
3456       sameDist[1] = PETSC_TRUE;
3457     } else {
3458       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3459       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3460       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3461     }
3462 
3463     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3464     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3465     sameRowDist = tsameDist[0];
3466   }
3467 
3468   if (sameRowDist) {
3469     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3470       /* isrow and iscol have same processor distribution as mat */
3471       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3472       PetscFunctionReturn(0);
3473     } else { /* sameRowDist */
3474       /* isrow has same processor distribution as mat */
3475       if (call == MAT_INITIAL_MATRIX) {
3476         PetscBool sorted;
3477         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3478         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3479         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3480         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3481 
3482         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3483         if (sorted) {
3484           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3485           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3486           PetscFunctionReturn(0);
3487         }
3488       } else { /* call == MAT_REUSE_MATRIX */
3489         IS    iscol_sub;
3490         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3491         if (iscol_sub) {
3492           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3493           PetscFunctionReturn(0);
3494         }
3495       }
3496     }
3497   }
3498 
3499   /* General case: iscol -> iscol_local which has global size of iscol */
3500   if (call == MAT_REUSE_MATRIX) {
3501     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3502     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3503   } else {
3504     if (!iscol_local) {
3505       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3506     }
3507   }
3508 
3509   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3510   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3511 
3512   if (call == MAT_INITIAL_MATRIX) {
3513     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3514     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3515   }
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 /*@C
3520      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3521          and "off-diagonal" part of the matrix in CSR format.
3522 
3523    Collective
3524 
3525    Input Parameters:
3526 +  comm - MPI communicator
3527 .  A - "diagonal" portion of matrix
3528 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3529 -  garray - global index of B columns
3530 
3531    Output Parameter:
3532 .   mat - the matrix, with input A as its local diagonal matrix
3533    Level: advanced
3534 
3535    Notes:
3536        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3537        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3538 
3539 .seealso: MatCreateMPIAIJWithSplitArrays()
3540 @*/
3541 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3542 {
3543   PetscErrorCode ierr;
3544   Mat_MPIAIJ     *maij;
3545   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3546   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3547   PetscScalar    *oa=b->a;
3548   Mat            Bnew;
3549   PetscInt       m,n,N;
3550 
3551   PetscFunctionBegin;
3552   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3553   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3554   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3555   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3556   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3557   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3558 
3559   /* Get global columns of mat */
3560   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3561 
3562   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3563   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3564   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3565   maij = (Mat_MPIAIJ*)(*mat)->data;
3566 
3567   (*mat)->preallocated = PETSC_TRUE;
3568 
3569   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3570   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3571 
3572   /* Set A as diagonal portion of *mat */
3573   maij->A = A;
3574 
3575   nz = oi[m];
3576   for (i=0; i<nz; i++) {
3577     col   = oj[i];
3578     oj[i] = garray[col];
3579   }
3580 
3581    /* Set Bnew as off-diagonal portion of *mat */
3582   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3583   bnew        = (Mat_SeqAIJ*)Bnew->data;
3584   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3585   maij->B     = Bnew;
3586 
3587   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3588 
3589   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3590   b->free_a       = PETSC_FALSE;
3591   b->free_ij      = PETSC_FALSE;
3592   ierr = MatDestroy(&B);CHKERRQ(ierr);
3593 
3594   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3595   bnew->free_a       = PETSC_TRUE;
3596   bnew->free_ij      = PETSC_TRUE;
3597 
3598   /* condense columns of maij->B */
3599   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3600   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3601   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3602   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3603   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3604   PetscFunctionReturn(0);
3605 }
3606 
3607 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3608 
3609 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3610 {
3611   PetscErrorCode ierr;
3612   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3613   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3614   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3615   Mat            M,Msub,B=a->B;
3616   MatScalar      *aa;
3617   Mat_SeqAIJ     *aij;
3618   PetscInt       *garray = a->garray,*colsub,Ncols;
3619   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3620   IS             iscol_sub,iscmap;
3621   const PetscInt *is_idx,*cmap;
3622   PetscBool      allcolumns=PETSC_FALSE;
3623   MPI_Comm       comm;
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3627 
3628   if (call == MAT_REUSE_MATRIX) {
3629     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3630     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3631     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3632 
3633     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3634     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3635 
3636     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3637     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3638 
3639     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3640 
3641   } else { /* call == MAT_INITIAL_MATRIX) */
3642     PetscBool flg;
3643 
3644     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3645     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3646 
3647     /* (1) iscol -> nonscalable iscol_local */
3648     /* Check for special case: each processor gets entire matrix columns */
3649     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3650     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3651     if (allcolumns) {
3652       iscol_sub = iscol_local;
3653       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3654       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3655 
3656     } else {
3657       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3658       PetscInt *idx,*cmap1,k;
3659       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3660       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3661       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3662       count = 0;
3663       k     = 0;
3664       for (i=0; i<Ncols; i++) {
3665         j = is_idx[i];
3666         if (j >= cstart && j < cend) {
3667           /* diagonal part of mat */
3668           idx[count]     = j;
3669           cmap1[count++] = i; /* column index in submat */
3670         } else if (Bn) {
3671           /* off-diagonal part of mat */
3672           if (j == garray[k]) {
3673             idx[count]     = j;
3674             cmap1[count++] = i;  /* column index in submat */
3675           } else if (j > garray[k]) {
3676             while (j > garray[k] && k < Bn-1) k++;
3677             if (j == garray[k]) {
3678               idx[count]     = j;
3679               cmap1[count++] = i; /* column index in submat */
3680             }
3681           }
3682         }
3683       }
3684       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3685 
3686       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3687       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3688       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3689 
3690       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3691     }
3692 
3693     /* (3) Create sequential Msub */
3694     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3695   }
3696 
3697   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3698   aij  = (Mat_SeqAIJ*)(Msub)->data;
3699   ii   = aij->i;
3700   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3701 
3702   /*
3703       m - number of local rows
3704       Ncols - number of columns (same on all processors)
3705       rstart - first row in new global matrix generated
3706   */
3707   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3708 
3709   if (call == MAT_INITIAL_MATRIX) {
3710     /* (4) Create parallel newmat */
3711     PetscMPIInt    rank,size;
3712     PetscInt       csize;
3713 
3714     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3715     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3716 
3717     /*
3718         Determine the number of non-zeros in the diagonal and off-diagonal
3719         portions of the matrix in order to do correct preallocation
3720     */
3721 
3722     /* first get start and end of "diagonal" columns */
3723     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3724     if (csize == PETSC_DECIDE) {
3725       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3726       if (mglobal == Ncols) { /* square matrix */
3727         nlocal = m;
3728       } else {
3729         nlocal = Ncols/size + ((Ncols % size) > rank);
3730       }
3731     } else {
3732       nlocal = csize;
3733     }
3734     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3735     rstart = rend - nlocal;
3736     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3737 
3738     /* next, compute all the lengths */
3739     jj    = aij->j;
3740     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3741     olens = dlens + m;
3742     for (i=0; i<m; i++) {
3743       jend = ii[i+1] - ii[i];
3744       olen = 0;
3745       dlen = 0;
3746       for (j=0; j<jend; j++) {
3747         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3748         else dlen++;
3749         jj++;
3750       }
3751       olens[i] = olen;
3752       dlens[i] = dlen;
3753     }
3754 
3755     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3756     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3757 
3758     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3759     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3760     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3761     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3762     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3763     ierr = PetscFree(dlens);CHKERRQ(ierr);
3764 
3765   } else { /* call == MAT_REUSE_MATRIX */
3766     M    = *newmat;
3767     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3768     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3769     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3770     /*
3771          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3772        rather than the slower MatSetValues().
3773     */
3774     M->was_assembled = PETSC_TRUE;
3775     M->assembled     = PETSC_FALSE;
3776   }
3777 
3778   /* (5) Set values of Msub to *newmat */
3779   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3780   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3781 
3782   jj   = aij->j;
3783   aa   = aij->a;
3784   for (i=0; i<m; i++) {
3785     row = rstart + i;
3786     nz  = ii[i+1] - ii[i];
3787     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3788     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3789     jj += nz; aa += nz;
3790   }
3791   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3792 
3793   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3794   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3795 
3796   ierr = PetscFree(colsub);CHKERRQ(ierr);
3797 
3798   /* save Msub, iscol_sub and iscmap used in processor for next request */
3799   if (call ==  MAT_INITIAL_MATRIX) {
3800     *newmat = M;
3801     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3802     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3803 
3804     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3805     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3806 
3807     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3808     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3809 
3810     if (iscol_local) {
3811       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3812       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3813     }
3814   }
3815   PetscFunctionReturn(0);
3816 }
3817 
3818 /*
3819     Not great since it makes two copies of the submatrix, first an SeqAIJ
3820   in local and then by concatenating the local matrices the end result.
3821   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3822 
3823   Note: This requires a sequential iscol with all indices.
3824 */
3825 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3826 {
3827   PetscErrorCode ierr;
3828   PetscMPIInt    rank,size;
3829   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3830   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3831   Mat            M,Mreuse;
3832   MatScalar      *aa,*vwork;
3833   MPI_Comm       comm;
3834   Mat_SeqAIJ     *aij;
3835   PetscBool      colflag,allcolumns=PETSC_FALSE;
3836 
3837   PetscFunctionBegin;
3838   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3839   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3840   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3841 
3842   /* Check for special case: each processor gets entire matrix columns */
3843   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3844   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3845   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3846 
3847   if (call ==  MAT_REUSE_MATRIX) {
3848     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3849     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3850     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3851   } else {
3852     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3853   }
3854 
3855   /*
3856       m - number of local rows
3857       n - number of columns (same on all processors)
3858       rstart - first row in new global matrix generated
3859   */
3860   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3861   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3862   if (call == MAT_INITIAL_MATRIX) {
3863     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3864     ii  = aij->i;
3865     jj  = aij->j;
3866 
3867     /*
3868         Determine the number of non-zeros in the diagonal and off-diagonal
3869         portions of the matrix in order to do correct preallocation
3870     */
3871 
3872     /* first get start and end of "diagonal" columns */
3873     if (csize == PETSC_DECIDE) {
3874       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3875       if (mglobal == n) { /* square matrix */
3876         nlocal = m;
3877       } else {
3878         nlocal = n/size + ((n % size) > rank);
3879       }
3880     } else {
3881       nlocal = csize;
3882     }
3883     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3884     rstart = rend - nlocal;
3885     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3886 
3887     /* next, compute all the lengths */
3888     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3889     olens = dlens + m;
3890     for (i=0; i<m; i++) {
3891       jend = ii[i+1] - ii[i];
3892       olen = 0;
3893       dlen = 0;
3894       for (j=0; j<jend; j++) {
3895         if (*jj < rstart || *jj >= rend) olen++;
3896         else dlen++;
3897         jj++;
3898       }
3899       olens[i] = olen;
3900       dlens[i] = dlen;
3901     }
3902     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3903     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3904     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3905     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3906     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3907     ierr = PetscFree(dlens);CHKERRQ(ierr);
3908   } else {
3909     PetscInt ml,nl;
3910 
3911     M    = *newmat;
3912     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3913     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3914     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3915     /*
3916          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3917        rather than the slower MatSetValues().
3918     */
3919     M->was_assembled = PETSC_TRUE;
3920     M->assembled     = PETSC_FALSE;
3921   }
3922   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3923   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3924   ii   = aij->i;
3925   jj   = aij->j;
3926   aa   = aij->a;
3927   for (i=0; i<m; i++) {
3928     row   = rstart + i;
3929     nz    = ii[i+1] - ii[i];
3930     cwork = jj;     jj += nz;
3931     vwork = aa;     aa += nz;
3932     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3933   }
3934 
3935   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   *newmat = M;
3938 
3939   /* save submatrix used in processor for next request */
3940   if (call ==  MAT_INITIAL_MATRIX) {
3941     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3942     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3943   }
3944   PetscFunctionReturn(0);
3945 }
3946 
3947 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3948 {
3949   PetscInt       m,cstart, cend,j,nnz,i,d;
3950   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3951   const PetscInt *JJ;
3952   PetscErrorCode ierr;
3953   PetscBool      nooffprocentries;
3954 
3955   PetscFunctionBegin;
3956   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3957 
3958   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3959   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3960   m      = B->rmap->n;
3961   cstart = B->cmap->rstart;
3962   cend   = B->cmap->rend;
3963   rstart = B->rmap->rstart;
3964 
3965   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3966 
3967 #if defined(PETSC_USE_DEBUG)
3968   for (i=0; i<m; i++) {
3969     nnz = Ii[i+1]- Ii[i];
3970     JJ  = J + Ii[i];
3971     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3972     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3973     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3974   }
3975 #endif
3976 
3977   for (i=0; i<m; i++) {
3978     nnz     = Ii[i+1]- Ii[i];
3979     JJ      = J + Ii[i];
3980     nnz_max = PetscMax(nnz_max,nnz);
3981     d       = 0;
3982     for (j=0; j<nnz; j++) {
3983       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3984     }
3985     d_nnz[i] = d;
3986     o_nnz[i] = nnz - d;
3987   }
3988   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3989   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3990 
3991   for (i=0; i<m; i++) {
3992     ii   = i + rstart;
3993     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3994   }
3995   nooffprocentries    = B->nooffprocentries;
3996   B->nooffprocentries = PETSC_TRUE;
3997   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3998   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3999   B->nooffprocentries = nooffprocentries;
4000 
4001   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4002   PetscFunctionReturn(0);
4003 }
4004 
4005 /*@
4006    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4007    (the default parallel PETSc format).
4008 
4009    Collective
4010 
4011    Input Parameters:
4012 +  B - the matrix
4013 .  i - the indices into j for the start of each local row (starts with zero)
4014 .  j - the column indices for each local row (starts with zero)
4015 -  v - optional values in the matrix
4016 
4017    Level: developer
4018 
4019    Notes:
4020        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4021      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4022      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4023 
4024        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4025 
4026        The format which is used for the sparse matrix input, is equivalent to a
4027     row-major ordering.. i.e for the following matrix, the input data expected is
4028     as shown
4029 
4030 $        1 0 0
4031 $        2 0 3     P0
4032 $       -------
4033 $        4 5 6     P1
4034 $
4035 $     Process0 [P0]: rows_owned=[0,1]
4036 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4037 $        j =  {0,0,2}  [size = 3]
4038 $        v =  {1,2,3}  [size = 3]
4039 $
4040 $     Process1 [P1]: rows_owned=[2]
4041 $        i =  {0,3}    [size = nrow+1  = 1+1]
4042 $        j =  {0,1,2}  [size = 3]
4043 $        v =  {4,5,6}  [size = 3]
4044 
4045 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4046           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4047 @*/
4048 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4049 {
4050   PetscErrorCode ierr;
4051 
4052   PetscFunctionBegin;
4053   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4054   PetscFunctionReturn(0);
4055 }
4056 
4057 /*@C
4058    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4059    (the default parallel PETSc format).  For good matrix assembly performance
4060    the user should preallocate the matrix storage by setting the parameters
4061    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4062    performance can be increased by more than a factor of 50.
4063 
4064    Collective
4065 
4066    Input Parameters:
4067 +  B - the matrix
4068 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4069            (same value is used for all local rows)
4070 .  d_nnz - array containing the number of nonzeros in the various rows of the
4071            DIAGONAL portion of the local submatrix (possibly different for each row)
4072            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4073            The size of this array is equal to the number of local rows, i.e 'm'.
4074            For matrices that will be factored, you must leave room for (and set)
4075            the diagonal entry even if it is zero.
4076 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4077            submatrix (same value is used for all local rows).
4078 -  o_nnz - array containing the number of nonzeros in the various rows of the
4079            OFF-DIAGONAL portion of the local submatrix (possibly different for
4080            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4081            structure. The size of this array is equal to the number
4082            of local rows, i.e 'm'.
4083 
4084    If the *_nnz parameter is given then the *_nz parameter is ignored
4085 
4086    The AIJ format (also called the Yale sparse matrix format or
4087    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4088    storage.  The stored row and column indices begin with zero.
4089    See Users-Manual: ch_mat for details.
4090 
4091    The parallel matrix is partitioned such that the first m0 rows belong to
4092    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4093    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4094 
4095    The DIAGONAL portion of the local submatrix of a processor can be defined
4096    as the submatrix which is obtained by extraction the part corresponding to
4097    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4098    first row that belongs to the processor, r2 is the last row belonging to
4099    the this processor, and c1-c2 is range of indices of the local part of a
4100    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4101    common case of a square matrix, the row and column ranges are the same and
4102    the DIAGONAL part is also square. The remaining portion of the local
4103    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4104 
4105    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4106 
4107    You can call MatGetInfo() to get information on how effective the preallocation was;
4108    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4109    You can also run with the option -info and look for messages with the string
4110    malloc in them to see if additional memory allocation was needed.
4111 
4112    Example usage:
4113 
4114    Consider the following 8x8 matrix with 34 non-zero values, that is
4115    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4116    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4117    as follows:
4118 
4119 .vb
4120             1  2  0  |  0  3  0  |  0  4
4121     Proc0   0  5  6  |  7  0  0  |  8  0
4122             9  0 10  | 11  0  0  | 12  0
4123     -------------------------------------
4124            13  0 14  | 15 16 17  |  0  0
4125     Proc1   0 18  0  | 19 20 21  |  0  0
4126             0  0  0  | 22 23  0  | 24  0
4127     -------------------------------------
4128     Proc2  25 26 27  |  0  0 28  | 29  0
4129            30  0  0  | 31 32 33  |  0 34
4130 .ve
4131 
4132    This can be represented as a collection of submatrices as:
4133 
4134 .vb
4135       A B C
4136       D E F
4137       G H I
4138 .ve
4139 
4140    Where the submatrices A,B,C are owned by proc0, D,E,F are
4141    owned by proc1, G,H,I are owned by proc2.
4142 
4143    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4144    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4145    The 'M','N' parameters are 8,8, and have the same values on all procs.
4146 
4147    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4148    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4149    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4150    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4151    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4152    matrix, ans [DF] as another SeqAIJ matrix.
4153 
4154    When d_nz, o_nz parameters are specified, d_nz storage elements are
4155    allocated for every row of the local diagonal submatrix, and o_nz
4156    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4157    One way to choose d_nz and o_nz is to use the max nonzerors per local
4158    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4159    In this case, the values of d_nz,o_nz are:
4160 .vb
4161      proc0 : dnz = 2, o_nz = 2
4162      proc1 : dnz = 3, o_nz = 2
4163      proc2 : dnz = 1, o_nz = 4
4164 .ve
4165    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4166    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4167    for proc3. i.e we are using 12+15+10=37 storage locations to store
4168    34 values.
4169 
4170    When d_nnz, o_nnz parameters are specified, the storage is specified
4171    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4172    In the above case the values for d_nnz,o_nnz are:
4173 .vb
4174      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4175      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4176      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4177 .ve
4178    Here the space allocated is sum of all the above values i.e 34, and
4179    hence pre-allocation is perfect.
4180 
4181    Level: intermediate
4182 
4183 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4184           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4185 @*/
4186 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4187 {
4188   PetscErrorCode ierr;
4189 
4190   PetscFunctionBegin;
4191   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4192   PetscValidType(B,1);
4193   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4194   PetscFunctionReturn(0);
4195 }
4196 
4197 /*@
4198      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4199          CSR format for the local rows.
4200 
4201    Collective
4202 
4203    Input Parameters:
4204 +  comm - MPI communicator
4205 .  m - number of local rows (Cannot be PETSC_DECIDE)
4206 .  n - This value should be the same as the local size used in creating the
4207        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4208        calculated if N is given) For square matrices n is almost always m.
4209 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4210 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4211 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4212 .   j - column indices
4213 -   a - matrix values
4214 
4215    Output Parameter:
4216 .   mat - the matrix
4217 
4218    Level: intermediate
4219 
4220    Notes:
4221        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4222      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4223      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4224 
4225        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4226 
4227        The format which is used for the sparse matrix input, is equivalent to a
4228     row-major ordering.. i.e for the following matrix, the input data expected is
4229     as shown
4230 
4231        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4232 
4233 $        1 0 0
4234 $        2 0 3     P0
4235 $       -------
4236 $        4 5 6     P1
4237 $
4238 $     Process0 [P0]: rows_owned=[0,1]
4239 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4240 $        j =  {0,0,2}  [size = 3]
4241 $        v =  {1,2,3}  [size = 3]
4242 $
4243 $     Process1 [P1]: rows_owned=[2]
4244 $        i =  {0,3}    [size = nrow+1  = 1+1]
4245 $        j =  {0,1,2}  [size = 3]
4246 $        v =  {4,5,6}  [size = 3]
4247 
4248 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4249           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4250 @*/
4251 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4252 {
4253   PetscErrorCode ierr;
4254 
4255   PetscFunctionBegin;
4256   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4257   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4258   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4259   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4260   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4261   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4262   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4263   PetscFunctionReturn(0);
4264 }
4265 
4266 /*@
4267      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4268          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4269 
4270    Collective
4271 
4272    Input Parameters:
4273 +  mat - the matrix
4274 .  m - number of local rows (Cannot be PETSC_DECIDE)
4275 .  n - This value should be the same as the local size used in creating the
4276        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4277        calculated if N is given) For square matrices n is almost always m.
4278 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4279 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4280 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4281 .  J - column indices
4282 -  v - matrix values
4283 
4284    Level: intermediate
4285 
4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4287           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4288 @*/
4289 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4290 {
4291   PetscErrorCode ierr;
4292   PetscInt       cstart,nnz,i,j;
4293   PetscInt       *ld;
4294   PetscBool      nooffprocentries;
4295   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4296   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4297   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4298   const PetscInt *Adi = Ad->i;
4299   PetscInt       ldi,Iii,md;
4300 
4301   PetscFunctionBegin;
4302   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4303   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4304   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4305   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4306 
4307   cstart = mat->cmap->rstart;
4308   if (!Aij->ld) {
4309     /* count number of entries below block diagonal */
4310     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4311     Aij->ld = ld;
4312     for (i=0; i<m; i++) {
4313       nnz  = Ii[i+1]- Ii[i];
4314       j     = 0;
4315       while  (J[j] < cstart && j < nnz) {j++;}
4316       J    += nnz;
4317       ld[i] = j;
4318     }
4319   } else {
4320     ld = Aij->ld;
4321   }
4322 
4323   for (i=0; i<m; i++) {
4324     nnz  = Ii[i+1]- Ii[i];
4325     Iii  = Ii[i];
4326     ldi  = ld[i];
4327     md   = Adi[i+1]-Adi[i];
4328     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4329     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4330     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4331     ad  += md;
4332     ao  += nnz - md;
4333   }
4334   nooffprocentries      = mat->nooffprocentries;
4335   mat->nooffprocentries = PETSC_TRUE;
4336   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4337   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4338   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4339   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4340   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4341   mat->nooffprocentries = nooffprocentries;
4342   PetscFunctionReturn(0);
4343 }
4344 
4345 /*@C
4346    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4347    (the default parallel PETSc format).  For good matrix assembly performance
4348    the user should preallocate the matrix storage by setting the parameters
4349    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4350    performance can be increased by more than a factor of 50.
4351 
4352    Collective
4353 
4354    Input Parameters:
4355 +  comm - MPI communicator
4356 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4357            This value should be the same as the local size used in creating the
4358            y vector for the matrix-vector product y = Ax.
4359 .  n - This value should be the same as the local size used in creating the
4360        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4361        calculated if N is given) For square matrices n is almost always m.
4362 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4363 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4364 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4365            (same value is used for all local rows)
4366 .  d_nnz - array containing the number of nonzeros in the various rows of the
4367            DIAGONAL portion of the local submatrix (possibly different for each row)
4368            or NULL, if d_nz is used to specify the nonzero structure.
4369            The size of this array is equal to the number of local rows, i.e 'm'.
4370 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4371            submatrix (same value is used for all local rows).
4372 -  o_nnz - array containing the number of nonzeros in the various rows of the
4373            OFF-DIAGONAL portion of the local submatrix (possibly different for
4374            each row) or NULL, if o_nz is used to specify the nonzero
4375            structure. The size of this array is equal to the number
4376            of local rows, i.e 'm'.
4377 
4378    Output Parameter:
4379 .  A - the matrix
4380 
4381    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4382    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4383    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4384 
4385    Notes:
4386    If the *_nnz parameter is given then the *_nz parameter is ignored
4387 
4388    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4389    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4390    storage requirements for this matrix.
4391 
4392    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4393    processor than it must be used on all processors that share the object for
4394    that argument.
4395 
4396    The user MUST specify either the local or global matrix dimensions
4397    (possibly both).
4398 
4399    The parallel matrix is partitioned across processors such that the
4400    first m0 rows belong to process 0, the next m1 rows belong to
4401    process 1, the next m2 rows belong to process 2 etc.. where
4402    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4403    values corresponding to [m x N] submatrix.
4404 
4405    The columns are logically partitioned with the n0 columns belonging
4406    to 0th partition, the next n1 columns belonging to the next
4407    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4408 
4409    The DIAGONAL portion of the local submatrix on any given processor
4410    is the submatrix corresponding to the rows and columns m,n
4411    corresponding to the given processor. i.e diagonal matrix on
4412    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4413    etc. The remaining portion of the local submatrix [m x (N-n)]
4414    constitute the OFF-DIAGONAL portion. The example below better
4415    illustrates this concept.
4416 
4417    For a square global matrix we define each processor's diagonal portion
4418    to be its local rows and the corresponding columns (a square submatrix);
4419    each processor's off-diagonal portion encompasses the remainder of the
4420    local matrix (a rectangular submatrix).
4421 
4422    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4423 
4424    When calling this routine with a single process communicator, a matrix of
4425    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4426    type of communicator, use the construction mechanism
4427 .vb
4428      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4429 .ve
4430 
4431 $     MatCreate(...,&A);
4432 $     MatSetType(A,MATMPIAIJ);
4433 $     MatSetSizes(A, m,n,M,N);
4434 $     MatMPIAIJSetPreallocation(A,...);
4435 
4436    By default, this format uses inodes (identical nodes) when possible.
4437    We search for consecutive rows with the same nonzero structure, thereby
4438    reusing matrix information to achieve increased efficiency.
4439 
4440    Options Database Keys:
4441 +  -mat_no_inode  - Do not use inodes
4442 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4443 
4444 
4445 
4446    Example usage:
4447 
4448    Consider the following 8x8 matrix with 34 non-zero values, that is
4449    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4450    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4451    as follows
4452 
4453 .vb
4454             1  2  0  |  0  3  0  |  0  4
4455     Proc0   0  5  6  |  7  0  0  |  8  0
4456             9  0 10  | 11  0  0  | 12  0
4457     -------------------------------------
4458            13  0 14  | 15 16 17  |  0  0
4459     Proc1   0 18  0  | 19 20 21  |  0  0
4460             0  0  0  | 22 23  0  | 24  0
4461     -------------------------------------
4462     Proc2  25 26 27  |  0  0 28  | 29  0
4463            30  0  0  | 31 32 33  |  0 34
4464 .ve
4465 
4466    This can be represented as a collection of submatrices as
4467 
4468 .vb
4469       A B C
4470       D E F
4471       G H I
4472 .ve
4473 
4474    Where the submatrices A,B,C are owned by proc0, D,E,F are
4475    owned by proc1, G,H,I are owned by proc2.
4476 
4477    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4478    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4479    The 'M','N' parameters are 8,8, and have the same values on all procs.
4480 
4481    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4482    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4483    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4484    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4485    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4486    matrix, ans [DF] as another SeqAIJ matrix.
4487 
4488    When d_nz, o_nz parameters are specified, d_nz storage elements are
4489    allocated for every row of the local diagonal submatrix, and o_nz
4490    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4491    One way to choose d_nz and o_nz is to use the max nonzerors per local
4492    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4493    In this case, the values of d_nz,o_nz are
4494 .vb
4495      proc0 : dnz = 2, o_nz = 2
4496      proc1 : dnz = 3, o_nz = 2
4497      proc2 : dnz = 1, o_nz = 4
4498 .ve
4499    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4500    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4501    for proc3. i.e we are using 12+15+10=37 storage locations to store
4502    34 values.
4503 
4504    When d_nnz, o_nnz parameters are specified, the storage is specified
4505    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4506    In the above case the values for d_nnz,o_nnz are
4507 .vb
4508      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4509      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4510      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4511 .ve
4512    Here the space allocated is sum of all the above values i.e 34, and
4513    hence pre-allocation is perfect.
4514 
4515    Level: intermediate
4516 
4517 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4518           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4519 @*/
4520 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4521 {
4522   PetscErrorCode ierr;
4523   PetscMPIInt    size;
4524 
4525   PetscFunctionBegin;
4526   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4527   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4529   if (size > 1) {
4530     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4531     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4532   } else {
4533     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4534     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4535   }
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4540 {
4541   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4542   PetscBool      flg;
4543   PetscErrorCode ierr;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4547   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4548   if (Ad)     *Ad     = a->A;
4549   if (Ao)     *Ao     = a->B;
4550   if (colmap) *colmap = a->garray;
4551   PetscFunctionReturn(0);
4552 }
4553 
4554 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4555 {
4556   PetscErrorCode ierr;
4557   PetscInt       m,N,i,rstart,nnz,Ii;
4558   PetscInt       *indx;
4559   PetscScalar    *values;
4560 
4561   PetscFunctionBegin;
4562   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4563   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4564     PetscInt       *dnz,*onz,sum,bs,cbs;
4565 
4566     if (n == PETSC_DECIDE) {
4567       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4568     }
4569     /* Check sum(n) = N */
4570     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4571     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4572 
4573     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4574     rstart -= m;
4575 
4576     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4577     for (i=0; i<m; i++) {
4578       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4579       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4580       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4581     }
4582 
4583     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4584     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4585     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4586     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4587     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4588     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4589     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4590     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4591   }
4592 
4593   /* numeric phase */
4594   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4595   for (i=0; i<m; i++) {
4596     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4597     Ii   = i + rstart;
4598     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4599     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4600   }
4601   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603   PetscFunctionReturn(0);
4604 }
4605 
4606 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4607 {
4608   PetscErrorCode    ierr;
4609   PetscMPIInt       rank;
4610   PetscInt          m,N,i,rstart,nnz;
4611   size_t            len;
4612   const PetscInt    *indx;
4613   PetscViewer       out;
4614   char              *name;
4615   Mat               B;
4616   const PetscScalar *values;
4617 
4618   PetscFunctionBegin;
4619   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4620   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4621   /* Should this be the type of the diagonal block of A? */
4622   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4623   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4624   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4625   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4626   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4627   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4628   for (i=0; i<m; i++) {
4629     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4630     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4631     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4632   }
4633   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4634   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4635 
4636   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4637   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4638   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4639   sprintf(name,"%s.%d",outfile,rank);
4640   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4641   ierr = PetscFree(name);CHKERRQ(ierr);
4642   ierr = MatView(B,out);CHKERRQ(ierr);
4643   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4644   ierr = MatDestroy(&B);CHKERRQ(ierr);
4645   PetscFunctionReturn(0);
4646 }
4647 
4648 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4649 {
4650   PetscErrorCode      ierr;
4651   Mat_Merge_SeqsToMPI *merge;
4652   PetscContainer      container;
4653 
4654   PetscFunctionBegin;
4655   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4656   if (container) {
4657     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4658     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4659     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4660     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4661     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4662     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4663     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4665     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4666     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4667     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4668     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4669     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4670     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4671     ierr = PetscFree(merge);CHKERRQ(ierr);
4672     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4673   }
4674   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4675   PetscFunctionReturn(0);
4676 }
4677 
4678 #include <../src/mat/utils/freespace.h>
4679 #include <petscbt.h>
4680 
4681 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4682 {
4683   PetscErrorCode      ierr;
4684   MPI_Comm            comm;
4685   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4686   PetscMPIInt         size,rank,taga,*len_s;
4687   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4688   PetscInt            proc,m;
4689   PetscInt            **buf_ri,**buf_rj;
4690   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4691   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4692   MPI_Request         *s_waits,*r_waits;
4693   MPI_Status          *status;
4694   MatScalar           *aa=a->a;
4695   MatScalar           **abuf_r,*ba_i;
4696   Mat_Merge_SeqsToMPI *merge;
4697   PetscContainer      container;
4698 
4699   PetscFunctionBegin;
4700   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4701   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4702 
4703   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4704   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4705 
4706   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4707   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4708 
4709   bi     = merge->bi;
4710   bj     = merge->bj;
4711   buf_ri = merge->buf_ri;
4712   buf_rj = merge->buf_rj;
4713 
4714   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4715   owners = merge->rowmap->range;
4716   len_s  = merge->len_s;
4717 
4718   /* send and recv matrix values */
4719   /*-----------------------------*/
4720   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4721   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4722 
4723   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4724   for (proc=0,k=0; proc<size; proc++) {
4725     if (!len_s[proc]) continue;
4726     i    = owners[proc];
4727     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4728     k++;
4729   }
4730 
4731   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4732   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4733   ierr = PetscFree(status);CHKERRQ(ierr);
4734 
4735   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4736   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4737 
4738   /* insert mat values of mpimat */
4739   /*----------------------------*/
4740   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4741   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4742 
4743   for (k=0; k<merge->nrecv; k++) {
4744     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4745     nrows       = *(buf_ri_k[k]);
4746     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4747     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4748   }
4749 
4750   /* set values of ba */
4751   m = merge->rowmap->n;
4752   for (i=0; i<m; i++) {
4753     arow = owners[rank] + i;
4754     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4755     bnzi = bi[i+1] - bi[i];
4756     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4757 
4758     /* add local non-zero vals of this proc's seqmat into ba */
4759     anzi   = ai[arow+1] - ai[arow];
4760     aj     = a->j + ai[arow];
4761     aa     = a->a + ai[arow];
4762     nextaj = 0;
4763     for (j=0; nextaj<anzi; j++) {
4764       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4765         ba_i[j] += aa[nextaj++];
4766       }
4767     }
4768 
4769     /* add received vals into ba */
4770     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4771       /* i-th row */
4772       if (i == *nextrow[k]) {
4773         anzi   = *(nextai[k]+1) - *nextai[k];
4774         aj     = buf_rj[k] + *(nextai[k]);
4775         aa     = abuf_r[k] + *(nextai[k]);
4776         nextaj = 0;
4777         for (j=0; nextaj<anzi; j++) {
4778           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4779             ba_i[j] += aa[nextaj++];
4780           }
4781         }
4782         nextrow[k]++; nextai[k]++;
4783       }
4784     }
4785     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4786   }
4787   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4788   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4789 
4790   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4791   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4792   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4793   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4794   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4795   PetscFunctionReturn(0);
4796 }
4797 
4798 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4799 {
4800   PetscErrorCode      ierr;
4801   Mat                 B_mpi;
4802   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4803   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4804   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4805   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4806   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4807   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4808   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4809   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4810   MPI_Status          *status;
4811   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4812   PetscBT             lnkbt;
4813   Mat_Merge_SeqsToMPI *merge;
4814   PetscContainer      container;
4815 
4816   PetscFunctionBegin;
4817   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4818 
4819   /* make sure it is a PETSc comm */
4820   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4822   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4823 
4824   ierr = PetscNew(&merge);CHKERRQ(ierr);
4825   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4826 
4827   /* determine row ownership */
4828   /*---------------------------------------------------------*/
4829   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4830   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4831   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4832   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4833   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4834   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4835   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4836 
4837   m      = merge->rowmap->n;
4838   owners = merge->rowmap->range;
4839 
4840   /* determine the number of messages to send, their lengths */
4841   /*---------------------------------------------------------*/
4842   len_s = merge->len_s;
4843 
4844   len          = 0; /* length of buf_si[] */
4845   merge->nsend = 0;
4846   for (proc=0; proc<size; proc++) {
4847     len_si[proc] = 0;
4848     if (proc == rank) {
4849       len_s[proc] = 0;
4850     } else {
4851       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4852       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4853     }
4854     if (len_s[proc]) {
4855       merge->nsend++;
4856       nrows = 0;
4857       for (i=owners[proc]; i<owners[proc+1]; i++) {
4858         if (ai[i+1] > ai[i]) nrows++;
4859       }
4860       len_si[proc] = 2*(nrows+1);
4861       len         += len_si[proc];
4862     }
4863   }
4864 
4865   /* determine the number and length of messages to receive for ij-structure */
4866   /*-------------------------------------------------------------------------*/
4867   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4868   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4869 
4870   /* post the Irecv of j-structure */
4871   /*-------------------------------*/
4872   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4873   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4874 
4875   /* post the Isend of j-structure */
4876   /*--------------------------------*/
4877   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4878 
4879   for (proc=0, k=0; proc<size; proc++) {
4880     if (!len_s[proc]) continue;
4881     i    = owners[proc];
4882     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4883     k++;
4884   }
4885 
4886   /* receives and sends of j-structure are complete */
4887   /*------------------------------------------------*/
4888   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4889   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4890 
4891   /* send and recv i-structure */
4892   /*---------------------------*/
4893   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4894   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4895 
4896   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4897   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4898   for (proc=0,k=0; proc<size; proc++) {
4899     if (!len_s[proc]) continue;
4900     /* form outgoing message for i-structure:
4901          buf_si[0]:                 nrows to be sent
4902                [1:nrows]:           row index (global)
4903                [nrows+1:2*nrows+1]: i-structure index
4904     */
4905     /*-------------------------------------------*/
4906     nrows       = len_si[proc]/2 - 1;
4907     buf_si_i    = buf_si + nrows+1;
4908     buf_si[0]   = nrows;
4909     buf_si_i[0] = 0;
4910     nrows       = 0;
4911     for (i=owners[proc]; i<owners[proc+1]; i++) {
4912       anzi = ai[i+1] - ai[i];
4913       if (anzi) {
4914         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4915         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4916         nrows++;
4917       }
4918     }
4919     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4920     k++;
4921     buf_si += len_si[proc];
4922   }
4923 
4924   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4925   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4926 
4927   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4928   for (i=0; i<merge->nrecv; i++) {
4929     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4930   }
4931 
4932   ierr = PetscFree(len_si);CHKERRQ(ierr);
4933   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4934   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4935   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4936   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4937   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4938   ierr = PetscFree(status);CHKERRQ(ierr);
4939 
4940   /* compute a local seq matrix in each processor */
4941   /*----------------------------------------------*/
4942   /* allocate bi array and free space for accumulating nonzero column info */
4943   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4944   bi[0] = 0;
4945 
4946   /* create and initialize a linked list */
4947   nlnk = N+1;
4948   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4949 
4950   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4951   len  = ai[owners[rank+1]] - ai[owners[rank]];
4952   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4953 
4954   current_space = free_space;
4955 
4956   /* determine symbolic info for each local row */
4957   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4958 
4959   for (k=0; k<merge->nrecv; k++) {
4960     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4961     nrows       = *buf_ri_k[k];
4962     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4963     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4964   }
4965 
4966   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4967   len  = 0;
4968   for (i=0; i<m; i++) {
4969     bnzi = 0;
4970     /* add local non-zero cols of this proc's seqmat into lnk */
4971     arow  = owners[rank] + i;
4972     anzi  = ai[arow+1] - ai[arow];
4973     aj    = a->j + ai[arow];
4974     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4975     bnzi += nlnk;
4976     /* add received col data into lnk */
4977     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4978       if (i == *nextrow[k]) { /* i-th row */
4979         anzi  = *(nextai[k]+1) - *nextai[k];
4980         aj    = buf_rj[k] + *nextai[k];
4981         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4982         bnzi += nlnk;
4983         nextrow[k]++; nextai[k]++;
4984       }
4985     }
4986     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4987 
4988     /* if free space is not available, make more free space */
4989     if (current_space->local_remaining<bnzi) {
4990       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4991       nspacedouble++;
4992     }
4993     /* copy data into free space, then initialize lnk */
4994     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4995     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4996 
4997     current_space->array           += bnzi;
4998     current_space->local_used      += bnzi;
4999     current_space->local_remaining -= bnzi;
5000 
5001     bi[i+1] = bi[i] + bnzi;
5002   }
5003 
5004   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5005 
5006   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5007   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5008   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5009 
5010   /* create symbolic parallel matrix B_mpi */
5011   /*---------------------------------------*/
5012   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5013   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5014   if (n==PETSC_DECIDE) {
5015     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5016   } else {
5017     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5018   }
5019   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5020   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5021   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5022   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5023   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5024 
5025   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5026   B_mpi->assembled    = PETSC_FALSE;
5027   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5028   merge->bi           = bi;
5029   merge->bj           = bj;
5030   merge->buf_ri       = buf_ri;
5031   merge->buf_rj       = buf_rj;
5032   merge->coi          = NULL;
5033   merge->coj          = NULL;
5034   merge->owners_co    = NULL;
5035 
5036   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5037 
5038   /* attach the supporting struct to B_mpi for reuse */
5039   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5040   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5041   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5042   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5043   *mpimat = B_mpi;
5044 
5045   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5046   PetscFunctionReturn(0);
5047 }
5048 
5049 /*@C
5050       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5051                  matrices from each processor
5052 
5053     Collective
5054 
5055    Input Parameters:
5056 +    comm - the communicators the parallel matrix will live on
5057 .    seqmat - the input sequential matrices
5058 .    m - number of local rows (or PETSC_DECIDE)
5059 .    n - number of local columns (or PETSC_DECIDE)
5060 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5061 
5062    Output Parameter:
5063 .    mpimat - the parallel matrix generated
5064 
5065     Level: advanced
5066 
5067    Notes:
5068      The dimensions of the sequential matrix in each processor MUST be the same.
5069      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5070      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5071 @*/
5072 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5073 {
5074   PetscErrorCode ierr;
5075   PetscMPIInt    size;
5076 
5077   PetscFunctionBegin;
5078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5079   if (size == 1) {
5080     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5081     if (scall == MAT_INITIAL_MATRIX) {
5082       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5083     } else {
5084       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5085     }
5086     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5087     PetscFunctionReturn(0);
5088   }
5089   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5090   if (scall == MAT_INITIAL_MATRIX) {
5091     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5092   }
5093   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5094   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5095   PetscFunctionReturn(0);
5096 }
5097 
5098 /*@
5099      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5100           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5101           with MatGetSize()
5102 
5103     Not Collective
5104 
5105    Input Parameters:
5106 +    A - the matrix
5107 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5108 
5109    Output Parameter:
5110 .    A_loc - the local sequential matrix generated
5111 
5112     Level: developer
5113 
5114    Notes:
5115      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5116      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5117      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5118      modify the values of the returned A_loc.
5119 
5120 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5121 
5122 @*/
5123 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5124 {
5125   PetscErrorCode ierr;
5126   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5127   Mat_SeqAIJ     *mat,*a,*b;
5128   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5129   MatScalar      *aa,*ba,*cam;
5130   PetscScalar    *ca;
5131   PetscMPIInt    size;
5132   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5133   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5134   PetscBool      match;
5135 
5136   PetscFunctionBegin;
5137   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5138   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5139   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5140   if (size == 1) {
5141     if (scall == MAT_INITIAL_MATRIX) {
5142       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5143       *A_loc = mpimat->A;
5144     } else if (scall == MAT_REUSE_MATRIX) {
5145       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5146     }
5147     PetscFunctionReturn(0);
5148   }
5149 
5150   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5151   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5152   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5153   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5154   aa = a->a; ba = b->a;
5155   if (scall == MAT_INITIAL_MATRIX) {
5156     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5157     ci[0] = 0;
5158     for (i=0; i<am; i++) {
5159       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5160     }
5161     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5162     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5163     k    = 0;
5164     for (i=0; i<am; i++) {
5165       ncols_o = bi[i+1] - bi[i];
5166       ncols_d = ai[i+1] - ai[i];
5167       /* off-diagonal portion of A */
5168       for (jo=0; jo<ncols_o; jo++) {
5169         col = cmap[*bj];
5170         if (col >= cstart) break;
5171         cj[k]   = col; bj++;
5172         ca[k++] = *ba++;
5173       }
5174       /* diagonal portion of A */
5175       for (j=0; j<ncols_d; j++) {
5176         cj[k]   = cstart + *aj++;
5177         ca[k++] = *aa++;
5178       }
5179       /* off-diagonal portion of A */
5180       for (j=jo; j<ncols_o; j++) {
5181         cj[k]   = cmap[*bj++];
5182         ca[k++] = *ba++;
5183       }
5184     }
5185     /* put together the new matrix */
5186     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5187     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5188     /* Since these are PETSc arrays, change flags to free them as necessary. */
5189     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5190     mat->free_a  = PETSC_TRUE;
5191     mat->free_ij = PETSC_TRUE;
5192     mat->nonew   = 0;
5193   } else if (scall == MAT_REUSE_MATRIX) {
5194     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5195     ci = mat->i; cj = mat->j; cam = mat->a;
5196     for (i=0; i<am; i++) {
5197       /* off-diagonal portion of A */
5198       ncols_o = bi[i+1] - bi[i];
5199       for (jo=0; jo<ncols_o; jo++) {
5200         col = cmap[*bj];
5201         if (col >= cstart) break;
5202         *cam++ = *ba++; bj++;
5203       }
5204       /* diagonal portion of A */
5205       ncols_d = ai[i+1] - ai[i];
5206       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5207       /* off-diagonal portion of A */
5208       for (j=jo; j<ncols_o; j++) {
5209         *cam++ = *ba++; bj++;
5210       }
5211     }
5212   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5213   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5214   PetscFunctionReturn(0);
5215 }
5216 
5217 /*@C
5218      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5219 
5220     Not Collective
5221 
5222    Input Parameters:
5223 +    A - the matrix
5224 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5225 -    row, col - index sets of rows and columns to extract (or NULL)
5226 
5227    Output Parameter:
5228 .    A_loc - the local sequential matrix generated
5229 
5230     Level: developer
5231 
5232 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5233 
5234 @*/
5235 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5236 {
5237   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5238   PetscErrorCode ierr;
5239   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5240   IS             isrowa,iscola;
5241   Mat            *aloc;
5242   PetscBool      match;
5243 
5244   PetscFunctionBegin;
5245   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5246   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5247   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5248   if (!row) {
5249     start = A->rmap->rstart; end = A->rmap->rend;
5250     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5251   } else {
5252     isrowa = *row;
5253   }
5254   if (!col) {
5255     start = A->cmap->rstart;
5256     cmap  = a->garray;
5257     nzA   = a->A->cmap->n;
5258     nzB   = a->B->cmap->n;
5259     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5260     ncols = 0;
5261     for (i=0; i<nzB; i++) {
5262       if (cmap[i] < start) idx[ncols++] = cmap[i];
5263       else break;
5264     }
5265     imark = i;
5266     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5267     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5268     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5269   } else {
5270     iscola = *col;
5271   }
5272   if (scall != MAT_INITIAL_MATRIX) {
5273     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5274     aloc[0] = *A_loc;
5275   }
5276   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5277   if (!col) { /* attach global id of condensed columns */
5278     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5279   }
5280   *A_loc = aloc[0];
5281   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5282   if (!row) {
5283     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5284   }
5285   if (!col) {
5286     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5287   }
5288   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5289   PetscFunctionReturn(0);
5290 }
5291 
5292 /*
5293  * Destroy a mat that may be composed with PetscSF communication objects.
5294  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5295  * */
5296 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5297 {
5298   PetscSF          sf,osf;
5299   IS               map;
5300   PetscErrorCode   ierr;
5301 
5302   PetscFunctionBegin;
5303   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5304   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5305   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5306   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5307   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5308   ierr = ISDestroy(&map);CHKERRQ(ierr);
5309   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5310   PetscFunctionReturn(0);
5311 }
5312 
5313 /*
5314  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5315  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5316  * on a global size.
5317  * */
5318 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5319 {
5320   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5321   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5322   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5323   PetscMPIInt              owner;
5324   PetscSFNode              *iremote,*oiremote;
5325   const PetscInt           *lrowindices;
5326   PetscErrorCode           ierr;
5327   PetscSF                  sf,osf;
5328   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5329   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5330   MPI_Comm                 comm;
5331   ISLocalToGlobalMapping   mapping;
5332 
5333   PetscFunctionBegin;
5334   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5335   /* plocalsize is the number of roots
5336    * nrows is the number of leaves
5337    * */
5338   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5339   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5340   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5341   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5342   for (i=0;i<nrows;i++) {
5343     /* Find a remote index and an owner for a row
5344      * The row could be local or remote
5345      * */
5346     owner = 0;
5347     lidx  = 0;
5348     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5349     iremote[i].index = lidx;
5350     iremote[i].rank  = owner;
5351   }
5352   /* Create SF to communicate how many nonzero columns for each row */
5353   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5354   /* SF will figure out the number of nonzero colunms for each row, and their
5355    * offsets
5356    * */
5357   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5358   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5359   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5360 
5361   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5362   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5363   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5364   roffsets[0] = 0;
5365   roffsets[1] = 0;
5366   for (i=0;i<plocalsize;i++) {
5367     /* diag */
5368     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5369     /* off diag */
5370     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5371     /* compute offsets so that we relative location for each row */
5372     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5373     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5374   }
5375   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5376   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5377   /* 'r' means root, and 'l' means leaf */
5378   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5379   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5380   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5381   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5382   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5383   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5384   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5385   dntotalcols = 0;
5386   ontotalcols = 0;
5387   ncol = 0;
5388   for (i=0;i<nrows;i++) {
5389     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5390     ncol = PetscMax(pnnz[i],ncol);
5391     /* diag */
5392     dntotalcols += nlcols[i*2+0];
5393     /* off diag */
5394     ontotalcols += nlcols[i*2+1];
5395   }
5396   /* We do not need to figure the right number of columns
5397    * since all the calculations will be done by going through the raw data
5398    * */
5399   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5400   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5401   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5402   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5403   /* diag */
5404   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5405   /* off diag */
5406   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5407   /* diag */
5408   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5409   /* off diag */
5410   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5411   dntotalcols = 0;
5412   ontotalcols = 0;
5413   ntotalcols  = 0;
5414   for (i=0;i<nrows;i++) {
5415     owner = 0;
5416     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5417     /* Set iremote for diag matrix */
5418     for (j=0;j<nlcols[i*2+0];j++) {
5419       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5420       iremote[dntotalcols].rank    = owner;
5421       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5422       ilocal[dntotalcols++]        = ntotalcols++;
5423     }
5424     /* off diag */
5425     for (j=0;j<nlcols[i*2+1];j++) {
5426       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5427       oiremote[ontotalcols].rank    = owner;
5428       oilocal[ontotalcols++]        = ntotalcols++;
5429     }
5430   }
5431   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5432   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5433   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5434   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5435   /* P serves as roots and P_oth is leaves
5436    * Diag matrix
5437    * */
5438   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5439   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5440   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5441 
5442   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5443   /* Off diag */
5444   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5445   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5446   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5447   /* We operate on the matrix internal data for saving memory */
5448   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5449   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5450   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5451   /* Convert to global indices for diag matrix */
5452   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5453   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5454   /* We want P_oth store global indices */
5455   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5456   /* Use memory scalable approach */
5457   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5458   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5459   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5460   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5461   /* Convert back to local indices */
5462   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5463   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5464   nout = 0;
5465   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5466   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5467   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5468   /* Exchange values */
5469   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5470   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5471   /* Stop PETSc from shrinking memory */
5472   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5473   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5474   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5475   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5476   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5477   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5478   /* ``New MatDestroy" takes care of PetscSF objects as well */
5479   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 /*
5484  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5485  * This supports MPIAIJ and MAIJ
5486  * */
5487 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5488 {
5489   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5490   Mat_SeqAIJ            *p_oth;
5491   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5492   IS                    rows,map;
5493   PetscHMapI            hamp;
5494   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5495   MPI_Comm              comm;
5496   PetscSF               sf,osf;
5497   PetscBool             has;
5498   PetscErrorCode        ierr;
5499 
5500   PetscFunctionBegin;
5501   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5502   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5503   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5504    *  and then create a submatrix (that often is an overlapping matrix)
5505    * */
5506   if (reuse==MAT_INITIAL_MATRIX) {
5507     /* Use a hash table to figure out unique keys */
5508     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5509     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5510     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5511     count = 0;
5512     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5513     for (i=0;i<a->B->cmap->n;i++) {
5514       key  = a->garray[i]/dof;
5515       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5516       if (!has) {
5517         mapping[i] = count;
5518         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5519       } else {
5520         /* Current 'i' has the same value the previous step */
5521         mapping[i] = count-1;
5522       }
5523     }
5524     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5525     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5526     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5527     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5528     off = 0;
5529     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5530     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5531     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5532     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5533     /* In case, the matrix was already created but users want to recreate the matrix */
5534     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5535     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5536     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5537     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5538   } else if (reuse==MAT_REUSE_MATRIX) {
5539     /* If matrix was already created, we simply update values using SF objects
5540      * that as attached to the matrix ealier.
5541      *  */
5542     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5543     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5544     if (!sf || !osf) {
5545       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5546     }
5547     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5548     /* Update values in place */
5549     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5550     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5551     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5552     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5553   } else {
5554     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5555   }
5556   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5557   PetscFunctionReturn(0);
5558 }
5559 
5560 /*@C
5561     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5562 
5563     Collective on Mat
5564 
5565    Input Parameters:
5566 +    A,B - the matrices in mpiaij format
5567 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5568 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5569 
5570    Output Parameter:
5571 +    rowb, colb - index sets of rows and columns of B to extract
5572 -    B_seq - the sequential matrix generated
5573 
5574     Level: developer
5575 
5576 @*/
5577 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5578 {
5579   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5580   PetscErrorCode ierr;
5581   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5582   IS             isrowb,iscolb;
5583   Mat            *bseq=NULL;
5584 
5585   PetscFunctionBegin;
5586   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5587     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5588   }
5589   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5590 
5591   if (scall == MAT_INITIAL_MATRIX) {
5592     start = A->cmap->rstart;
5593     cmap  = a->garray;
5594     nzA   = a->A->cmap->n;
5595     nzB   = a->B->cmap->n;
5596     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5597     ncols = 0;
5598     for (i=0; i<nzB; i++) {  /* row < local row index */
5599       if (cmap[i] < start) idx[ncols++] = cmap[i];
5600       else break;
5601     }
5602     imark = i;
5603     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5604     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5605     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5606     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5607   } else {
5608     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5609     isrowb  = *rowb; iscolb = *colb;
5610     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5611     bseq[0] = *B_seq;
5612   }
5613   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5614   *B_seq = bseq[0];
5615   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5616   if (!rowb) {
5617     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5618   } else {
5619     *rowb = isrowb;
5620   }
5621   if (!colb) {
5622     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5623   } else {
5624     *colb = iscolb;
5625   }
5626   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5627   PetscFunctionReturn(0);
5628 }
5629 
5630 /*
5631     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5632     of the OFF-DIAGONAL portion of local A
5633 
5634     Collective on Mat
5635 
5636    Input Parameters:
5637 +    A,B - the matrices in mpiaij format
5638 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5639 
5640    Output Parameter:
5641 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5642 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5643 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5644 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5645 
5646     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5647      for this matrix. This is not desirable..
5648 
5649     Level: developer
5650 
5651 */
5652 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5653 {
5654   PetscErrorCode         ierr;
5655   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5656   Mat_SeqAIJ             *b_oth;
5657   VecScatter             ctx;
5658   MPI_Comm               comm;
5659   const PetscMPIInt      *rprocs,*sprocs;
5660   const PetscInt         *srow,*rstarts,*sstarts;
5661   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5662   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5663   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5664   MPI_Request            *rwaits = NULL,*swaits = NULL;
5665   MPI_Status             rstatus;
5666   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5667 
5668   PetscFunctionBegin;
5669   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5670   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5671 
5672   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5673     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5674   }
5675   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5677 
5678   if (size == 1) {
5679     startsj_s = NULL;
5680     bufa_ptr  = NULL;
5681     *B_oth    = NULL;
5682     PetscFunctionReturn(0);
5683   }
5684 
5685   ctx = a->Mvctx;
5686   tag = ((PetscObject)ctx)->tag;
5687 
5688   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5689   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5690   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5691   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5692   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5693   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5694   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5695 
5696   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5697   if (scall == MAT_INITIAL_MATRIX) {
5698     /* i-array */
5699     /*---------*/
5700     /*  post receives */
5701     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5702     for (i=0; i<nrecvs; i++) {
5703       rowlen = rvalues + rstarts[i]*rbs;
5704       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5705       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5706     }
5707 
5708     /* pack the outgoing message */
5709     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5710 
5711     sstartsj[0] = 0;
5712     rstartsj[0] = 0;
5713     len         = 0; /* total length of j or a array to be sent */
5714     if (nsends) {
5715       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5716       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5717     }
5718     for (i=0; i<nsends; i++) {
5719       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5720       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5721       for (j=0; j<nrows; j++) {
5722         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5723         for (l=0; l<sbs; l++) {
5724           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5725 
5726           rowlen[j*sbs+l] = ncols;
5727 
5728           len += ncols;
5729           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5730         }
5731         k++;
5732       }
5733       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5734 
5735       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5736     }
5737     /* recvs and sends of i-array are completed */
5738     i = nrecvs;
5739     while (i--) {
5740       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5741     }
5742     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5743     ierr = PetscFree(svalues);CHKERRQ(ierr);
5744 
5745     /* allocate buffers for sending j and a arrays */
5746     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5747     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5748 
5749     /* create i-array of B_oth */
5750     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5751 
5752     b_othi[0] = 0;
5753     len       = 0; /* total length of j or a array to be received */
5754     k         = 0;
5755     for (i=0; i<nrecvs; i++) {
5756       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5757       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5758       for (j=0; j<nrows; j++) {
5759         b_othi[k+1] = b_othi[k] + rowlen[j];
5760         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5761         k++;
5762       }
5763       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5764     }
5765     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5766 
5767     /* allocate space for j and a arrrays of B_oth */
5768     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5769     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5770 
5771     /* j-array */
5772     /*---------*/
5773     /*  post receives of j-array */
5774     for (i=0; i<nrecvs; i++) {
5775       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5776       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5777     }
5778 
5779     /* pack the outgoing message j-array */
5780     if (nsends) k = sstarts[0];
5781     for (i=0; i<nsends; i++) {
5782       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5783       bufJ  = bufj+sstartsj[i];
5784       for (j=0; j<nrows; j++) {
5785         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5786         for (ll=0; ll<sbs; ll++) {
5787           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5788           for (l=0; l<ncols; l++) {
5789             *bufJ++ = cols[l];
5790           }
5791           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5792         }
5793       }
5794       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5795     }
5796 
5797     /* recvs and sends of j-array are completed */
5798     i = nrecvs;
5799     while (i--) {
5800       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5801     }
5802     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5803   } else if (scall == MAT_REUSE_MATRIX) {
5804     sstartsj = *startsj_s;
5805     rstartsj = *startsj_r;
5806     bufa     = *bufa_ptr;
5807     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5808     b_otha   = b_oth->a;
5809   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5810 
5811   /* a-array */
5812   /*---------*/
5813   /*  post receives of a-array */
5814   for (i=0; i<nrecvs; i++) {
5815     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5816     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5817   }
5818 
5819   /* pack the outgoing message a-array */
5820   if (nsends) k = sstarts[0];
5821   for (i=0; i<nsends; i++) {
5822     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5823     bufA  = bufa+sstartsj[i];
5824     for (j=0; j<nrows; j++) {
5825       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5826       for (ll=0; ll<sbs; ll++) {
5827         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5828         for (l=0; l<ncols; l++) {
5829           *bufA++ = vals[l];
5830         }
5831         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5832       }
5833     }
5834     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5835   }
5836   /* recvs and sends of a-array are completed */
5837   i = nrecvs;
5838   while (i--) {
5839     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5840   }
5841   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5842   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5843 
5844   if (scall == MAT_INITIAL_MATRIX) {
5845     /* put together the new matrix */
5846     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5847 
5848     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5849     /* Since these are PETSc arrays, change flags to free them as necessary. */
5850     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5851     b_oth->free_a  = PETSC_TRUE;
5852     b_oth->free_ij = PETSC_TRUE;
5853     b_oth->nonew   = 0;
5854 
5855     ierr = PetscFree(bufj);CHKERRQ(ierr);
5856     if (!startsj_s || !bufa_ptr) {
5857       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5858       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5859     } else {
5860       *startsj_s = sstartsj;
5861       *startsj_r = rstartsj;
5862       *bufa_ptr  = bufa;
5863     }
5864   }
5865 
5866   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5867   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5868   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5869   PetscFunctionReturn(0);
5870 }
5871 
5872 /*@C
5873   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5874 
5875   Not Collective
5876 
5877   Input Parameters:
5878 . A - The matrix in mpiaij format
5879 
5880   Output Parameter:
5881 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5882 . colmap - A map from global column index to local index into lvec
5883 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5884 
5885   Level: developer
5886 
5887 @*/
5888 #if defined(PETSC_USE_CTABLE)
5889 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5890 #else
5891 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5892 #endif
5893 {
5894   Mat_MPIAIJ *a;
5895 
5896   PetscFunctionBegin;
5897   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5898   PetscValidPointer(lvec, 2);
5899   PetscValidPointer(colmap, 3);
5900   PetscValidPointer(multScatter, 4);
5901   a = (Mat_MPIAIJ*) A->data;
5902   if (lvec) *lvec = a->lvec;
5903   if (colmap) *colmap = a->colmap;
5904   if (multScatter) *multScatter = a->Mvctx;
5905   PetscFunctionReturn(0);
5906 }
5907 
5908 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5911 #if defined(PETSC_HAVE_MKL_SPARSE)
5912 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5913 #endif
5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5915 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5916 #if defined(PETSC_HAVE_ELEMENTAL)
5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5918 #endif
5919 #if defined(PETSC_HAVE_HYPRE)
5920 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5921 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5922 #endif
5923 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5925 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5926 
5927 /*
5928     Computes (B'*A')' since computing B*A directly is untenable
5929 
5930                n                       p                          p
5931         (              )       (              )         (                  )
5932       m (      A       )  *  n (       B      )   =   m (         C        )
5933         (              )       (              )         (                  )
5934 
5935 */
5936 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5937 {
5938   PetscErrorCode ierr;
5939   Mat            At,Bt,Ct;
5940 
5941   PetscFunctionBegin;
5942   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5943   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5944   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5945   ierr = MatDestroy(&At);CHKERRQ(ierr);
5946   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5947   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5948   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5949   PetscFunctionReturn(0);
5950 }
5951 
5952 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5953 {
5954   PetscErrorCode ierr;
5955   PetscInt       m=A->rmap->n,n=B->cmap->n;
5956   Mat            Cmat;
5957 
5958   PetscFunctionBegin;
5959   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5960   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5961   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5962   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5963   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5964   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5965   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5966   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5967 
5968   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5969 
5970   *C = Cmat;
5971   PetscFunctionReturn(0);
5972 }
5973 
5974 /* ----------------------------------------------------------------*/
5975 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5976 {
5977   PetscErrorCode ierr;
5978 
5979   PetscFunctionBegin;
5980   if (scall == MAT_INITIAL_MATRIX) {
5981     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5982     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5983     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5984   }
5985   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5986   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5987   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5988   PetscFunctionReturn(0);
5989 }
5990 
5991 /*MC
5992    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5993 
5994    Options Database Keys:
5995 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5996 
5997    Level: beginner
5998 
5999    Notes:
6000     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6001     in this case the values associated with the rows and columns one passes in are set to zero
6002     in the matrix
6003 
6004     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6005     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6006 
6007 .seealso: MatCreateAIJ()
6008 M*/
6009 
6010 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6011 {
6012   Mat_MPIAIJ     *b;
6013   PetscErrorCode ierr;
6014   PetscMPIInt    size;
6015 
6016   PetscFunctionBegin;
6017   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6018 
6019   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6020   B->data       = (void*)b;
6021   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6022   B->assembled  = PETSC_FALSE;
6023   B->insertmode = NOT_SET_VALUES;
6024   b->size       = size;
6025 
6026   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6027 
6028   /* build cache for off array entries formed */
6029   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6030 
6031   b->donotstash  = PETSC_FALSE;
6032   b->colmap      = 0;
6033   b->garray      = 0;
6034   b->roworiented = PETSC_TRUE;
6035 
6036   /* stuff used for matrix vector multiply */
6037   b->lvec  = NULL;
6038   b->Mvctx = NULL;
6039 
6040   /* stuff for MatGetRow() */
6041   b->rowindices   = 0;
6042   b->rowvalues    = 0;
6043   b->getrowactive = PETSC_FALSE;
6044 
6045   /* flexible pointer used in CUSP/CUSPARSE classes */
6046   b->spptr = NULL;
6047 
6048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6058 #if defined(PETSC_HAVE_MKL_SPARSE)
6059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6060 #endif
6061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6064 #if defined(PETSC_HAVE_ELEMENTAL)
6065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6066 #endif
6067 #if defined(PETSC_HAVE_HYPRE)
6068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6069 #endif
6070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6075 #if defined(PETSC_HAVE_HYPRE)
6076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6077 #endif
6078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6079   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6080   PetscFunctionReturn(0);
6081 }
6082 
6083 /*@C
6084      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6085          and "off-diagonal" part of the matrix in CSR format.
6086 
6087    Collective
6088 
6089    Input Parameters:
6090 +  comm - MPI communicator
6091 .  m - number of local rows (Cannot be PETSC_DECIDE)
6092 .  n - This value should be the same as the local size used in creating the
6093        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6094        calculated if N is given) For square matrices n is almost always m.
6095 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6096 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6097 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6098 .   j - column indices
6099 .   a - matrix values
6100 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6101 .   oj - column indices
6102 -   oa - matrix values
6103 
6104    Output Parameter:
6105 .   mat - the matrix
6106 
6107    Level: advanced
6108 
6109    Notes:
6110        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6111        must free the arrays once the matrix has been destroyed and not before.
6112 
6113        The i and j indices are 0 based
6114 
6115        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6116 
6117        This sets local rows and cannot be used to set off-processor values.
6118 
6119        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6120        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6121        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6122        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6123        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6124        communication if it is known that only local entries will be set.
6125 
6126 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6127           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6128 @*/
6129 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6130 {
6131   PetscErrorCode ierr;
6132   Mat_MPIAIJ     *maij;
6133 
6134   PetscFunctionBegin;
6135   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6136   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6137   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6138   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6139   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6140   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6141   maij = (Mat_MPIAIJ*) (*mat)->data;
6142 
6143   (*mat)->preallocated = PETSC_TRUE;
6144 
6145   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6146   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6147 
6148   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6149   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6150 
6151   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6152   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6153   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6154   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6155 
6156   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6157   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6158   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6159   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6160   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6161   PetscFunctionReturn(0);
6162 }
6163 
6164 /*
6165     Special version for direct calls from Fortran
6166 */
6167 #include <petsc/private/fortranimpl.h>
6168 
6169 /* Change these macros so can be used in void function */
6170 #undef CHKERRQ
6171 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6172 #undef SETERRQ2
6173 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6174 #undef SETERRQ3
6175 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6176 #undef SETERRQ
6177 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6178 
6179 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6180 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6181 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6182 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6183 #else
6184 #endif
6185 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6186 {
6187   Mat            mat  = *mmat;
6188   PetscInt       m    = *mm, n = *mn;
6189   InsertMode     addv = *maddv;
6190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6191   PetscScalar    value;
6192   PetscErrorCode ierr;
6193 
6194   MatCheckPreallocated(mat,1);
6195   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6196 
6197 #if defined(PETSC_USE_DEBUG)
6198   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6199 #endif
6200   {
6201     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6202     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6203     PetscBool roworiented = aij->roworiented;
6204 
6205     /* Some Variables required in the macro */
6206     Mat        A                 = aij->A;
6207     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6208     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6209     MatScalar  *aa               = a->a;
6210     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6211     Mat        B                 = aij->B;
6212     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6213     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6214     MatScalar  *ba               = b->a;
6215 
6216     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6217     PetscInt  nonew = a->nonew;
6218     MatScalar *ap1,*ap2;
6219 
6220     PetscFunctionBegin;
6221     for (i=0; i<m; i++) {
6222       if (im[i] < 0) continue;
6223 #if defined(PETSC_USE_DEBUG)
6224       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6225 #endif
6226       if (im[i] >= rstart && im[i] < rend) {
6227         row      = im[i] - rstart;
6228         lastcol1 = -1;
6229         rp1      = aj + ai[row];
6230         ap1      = aa + ai[row];
6231         rmax1    = aimax[row];
6232         nrow1    = ailen[row];
6233         low1     = 0;
6234         high1    = nrow1;
6235         lastcol2 = -1;
6236         rp2      = bj + bi[row];
6237         ap2      = ba + bi[row];
6238         rmax2    = bimax[row];
6239         nrow2    = bilen[row];
6240         low2     = 0;
6241         high2    = nrow2;
6242 
6243         for (j=0; j<n; j++) {
6244           if (roworiented) value = v[i*n+j];
6245           else value = v[i+j*m];
6246           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6247           if (in[j] >= cstart && in[j] < cend) {
6248             col = in[j] - cstart;
6249             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6250           } else if (in[j] < 0) continue;
6251 #if defined(PETSC_USE_DEBUG)
6252           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6253           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6254 #endif
6255           else {
6256             if (mat->was_assembled) {
6257               if (!aij->colmap) {
6258                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6259               }
6260 #if defined(PETSC_USE_CTABLE)
6261               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6262               col--;
6263 #else
6264               col = aij->colmap[in[j]] - 1;
6265 #endif
6266               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6267                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6268                 col  =  in[j];
6269                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6270                 B     = aij->B;
6271                 b     = (Mat_SeqAIJ*)B->data;
6272                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6273                 rp2   = bj + bi[row];
6274                 ap2   = ba + bi[row];
6275                 rmax2 = bimax[row];
6276                 nrow2 = bilen[row];
6277                 low2  = 0;
6278                 high2 = nrow2;
6279                 bm    = aij->B->rmap->n;
6280                 ba    = b->a;
6281               }
6282             } else col = in[j];
6283             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6284           }
6285         }
6286       } else if (!aij->donotstash) {
6287         if (roworiented) {
6288           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6289         } else {
6290           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6291         }
6292       }
6293     }
6294   }
6295   PetscFunctionReturnVoid();
6296 }
6297