xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5c65b9ec7a426b386fab06d5b4773e7c8ee3a336)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 #include <petsc/private/hashseti.h>
10 
11 /*MC
12    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
13 
14    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
15    and MATMPIAIJ otherwise.  As a result, for single process communicators,
16   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
17   for communicators controlling multiple processes.  It is recommended that you call both of
18   the above preallocation routines for simplicity.
19 
20    Options Database Keys:
21 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
22 
23   Developer Notes:
24     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
25    enough exist.
26 
27   Level: beginner
28 
29 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
30 M*/
31 
32 /*MC
33    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
36    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
37    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
43 
44   Level: beginner
45 
46 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
47 M*/
48 
49 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
50 {
51   PetscErrorCode ierr;
52   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
53 
54   PetscFunctionBegin;
55   if (mat->A) {
56     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
57     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
58   }
59   PetscFunctionReturn(0);
60 }
61 
62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
63 {
64   PetscErrorCode  ierr;
65   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
66   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
67   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
68   const PetscInt  *ia,*ib;
69   const MatScalar *aa,*bb;
70   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
71   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
72 
73   PetscFunctionBegin;
74   *keptrows = 0;
75   ia        = a->i;
76   ib        = b->i;
77   for (i=0; i<m; i++) {
78     na = ia[i+1] - ia[i];
79     nb = ib[i+1] - ib[i];
80     if (!na && !nb) {
81       cnt++;
82       goto ok1;
83     }
84     aa = a->a + ia[i];
85     for (j=0; j<na; j++) {
86       if (aa[j] != 0.0) goto ok1;
87     }
88     bb = b->a + ib[i];
89     for (j=0; j <nb; j++) {
90       if (bb[j] != 0.0) goto ok1;
91     }
92     cnt++;
93 ok1:;
94   }
95   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
96   if (!n0rows) PetscFunctionReturn(0);
97   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
98   cnt  = 0;
99   for (i=0; i<m; i++) {
100     na = ia[i+1] - ia[i];
101     nb = ib[i+1] - ib[i];
102     if (!na && !nb) continue;
103     aa = a->a + ia[i];
104     for (j=0; j<na;j++) {
105       if (aa[j] != 0.0) {
106         rows[cnt++] = rstart + i;
107         goto ok2;
108       }
109     }
110     bb = b->a + ib[i];
111     for (j=0; j<nb; j++) {
112       if (bb[j] != 0.0) {
113         rows[cnt++] = rstart + i;
114         goto ok2;
115       }
116     }
117 ok2:;
118   }
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
124 {
125   PetscErrorCode    ierr;
126   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
127   PetscBool         cong;
128 
129   PetscFunctionBegin;
130   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
131   if (Y->assembled && cong) {
132     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
133   } else {
134     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
135   }
136   PetscFunctionReturn(0);
137 }
138 
139 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
140 {
141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
142   PetscErrorCode ierr;
143   PetscInt       i,rstart,nrows,*rows;
144 
145   PetscFunctionBegin;
146   *zrows = NULL;
147   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
148   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
149   for (i=0; i<nrows; i++) rows[i] += rstart;
150   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
151   PetscFunctionReturn(0);
152 }
153 
154 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
155 {
156   PetscErrorCode ierr;
157   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
158   PetscInt       i,n,*garray = aij->garray;
159   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
160   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
161   PetscReal      *work;
162 
163   PetscFunctionBegin;
164   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
165   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
168       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
169     }
170     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
171       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
172     }
173   } else if (type == NORM_1) {
174     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
175       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
176     }
177     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
178       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
179     }
180   } else if (type == NORM_INFINITY) {
181     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
182       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
183     }
184     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
185       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
186     }
187 
188   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
189   if (type == NORM_INFINITY) {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   } else {
192     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
193   }
194   ierr = PetscFree(work);CHKERRQ(ierr);
195   if (type == NORM_2) {
196     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
197   }
198   PetscFunctionReturn(0);
199 }
200 
201 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
202 {
203   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
204   IS              sis,gis;
205   PetscErrorCode  ierr;
206   const PetscInt  *isis,*igis;
207   PetscInt        n,*iis,nsis,ngis,rstart,i;
208 
209   PetscFunctionBegin;
210   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
211   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
212   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
213   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
214   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
215   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
216 
217   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
218   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
219   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
220   n    = ngis + nsis;
221   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
222   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
223   for (i=0; i<n; i++) iis[i] += rstart;
224   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
225 
226   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
227   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
228   ierr = ISDestroy(&sis);CHKERRQ(ierr);
229   ierr = ISDestroy(&gis);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 /*
234     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
235     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
236 
237     Only for square matrices
238 
239     Used by a preconditioner, hence PETSC_EXTERN
240 */
241 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
242 {
243   PetscMPIInt    rank,size;
244   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
245   PetscErrorCode ierr;
246   Mat            mat;
247   Mat_SeqAIJ     *gmata;
248   PetscMPIInt    tag;
249   MPI_Status     status;
250   PetscBool      aij;
251   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
252 
253   PetscFunctionBegin;
254   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
255   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
256   if (!rank) {
257     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
258     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
259   }
260   if (reuse == MAT_INITIAL_MATRIX) {
261     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
262     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
263     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
264     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
265     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
266     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
267     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
268     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
269     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
270 
271     rowners[0] = 0;
272     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
273     rstart = rowners[rank];
274     rend   = rowners[rank+1];
275     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
276     if (!rank) {
277       gmata = (Mat_SeqAIJ*) gmat->data;
278       /* send row lengths to all processors */
279       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
280       for (i=1; i<size; i++) {
281         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
282       }
283       /* determine number diagonal and off-diagonal counts */
284       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
285       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
286       jj   = 0;
287       for (i=0; i<m; i++) {
288         for (j=0; j<dlens[i]; j++) {
289           if (gmata->j[jj] < rstart) ld[i]++;
290           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
291           jj++;
292         }
293       }
294       /* send column indices to other processes */
295       for (i=1; i<size; i++) {
296         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
297         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300 
301       /* send numerical values to other processes */
302       for (i=1; i<size; i++) {
303         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
304         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
305       }
306       gmataa = gmata->a;
307       gmataj = gmata->j;
308 
309     } else {
310       /* receive row lengths */
311       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       /* receive column indices */
313       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
315       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
316       /* determine number diagonal and off-diagonal counts */
317       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
318       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
319       jj   = 0;
320       for (i=0; i<m; i++) {
321         for (j=0; j<dlens[i]; j++) {
322           if (gmataj[jj] < rstart) ld[i]++;
323           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
324           jj++;
325         }
326       }
327       /* receive numerical values */
328       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
329       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
330     }
331     /* set preallocation */
332     for (i=0; i<m; i++) {
333       dlens[i] -= olens[i];
334     }
335     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
336     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
337 
338     for (i=0; i<m; i++) {
339       dlens[i] += olens[i];
340     }
341     cnt = 0;
342     for (i=0; i<m; i++) {
343       row  = rstart + i;
344       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
345       cnt += dlens[i];
346     }
347     if (rank) {
348       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
349     }
350     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
351     ierr = PetscFree(rowners);CHKERRQ(ierr);
352 
353     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
354 
355     *inmat = mat;
356   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
357     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
358     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
359     mat  = *inmat;
360     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
361     if (!rank) {
362       /* send numerical values to other processes */
363       gmata  = (Mat_SeqAIJ*) gmat->data;
364       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
365       gmataa = gmata->a;
366       for (i=1; i<size; i++) {
367         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
368         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
369       }
370       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
371     } else {
372       /* receive numerical values from process 0*/
373       nz   = Ad->nz + Ao->nz;
374       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
375       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
376     }
377     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
378     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
379     ad = Ad->a;
380     ao = Ao->a;
381     if (mat->rmap->n) {
382       i  = 0;
383       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
384       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
385     }
386     for (i=1; i<mat->rmap->n; i++) {
387       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
388       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
389     }
390     i--;
391     if (mat->rmap->n) {
392       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
393     }
394     if (rank) {
395       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
396     }
397   }
398   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
400   PetscFunctionReturn(0);
401 }
402 
403 /*
404   Local utility routine that creates a mapping from the global column
405 number to the local number in the off-diagonal part of the local
406 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
407 a slightly higher hash table cost; without it it is not scalable (each processor
408 has an order N integer array but is fast to acess.
409 */
410 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       n = aij->B->cmap->n,i;
415 
416   PetscFunctionBegin;
417   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
418 #if defined(PETSC_USE_CTABLE)
419   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   for (i=0; i<n; i++) {
421     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
422   }
423 #else
424   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
425   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
426   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
427 #endif
428   PetscFunctionReturn(0);
429 }
430 
431 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
432 { \
433     if (col <= lastcol1)  low1 = 0;     \
434     else                 high1 = nrow1; \
435     lastcol1 = col;\
436     while (high1-low1 > 5) { \
437       t = (low1+high1)/2; \
438       if (rp1[t] > col) high1 = t; \
439       else              low1  = t; \
440     } \
441       for (_i=low1; _i<high1; _i++) { \
442         if (rp1[_i] > col) break; \
443         if (rp1[_i] == col) { \
444           if (addv == ADD_VALUES) { \
445             ap1[_i] += value;   \
446             /* Not sure LogFlops will slow dow the code or not */ \
447             (void)PetscLogFlops(1.0);   \
448            } \
449           else                    ap1[_i] = value; \
450           goto a_noinsert; \
451         } \
452       }  \
453       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
454       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
455       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
456       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
457       N = nrow1++ - 1; a->nz++; high1++; \
458       /* shift up all the later entries in this row */ \
459       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
460       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
461       rp1[_i] = col;  \
462       ap1[_i] = value;  \
463       A->nonzerostate++;\
464       a_noinsert: ; \
465       ailen[row] = nrow1; \
466 }
467 
468 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
469   { \
470     if (col <= lastcol2) low2 = 0;                        \
471     else high2 = nrow2;                                   \
472     lastcol2 = col;                                       \
473     while (high2-low2 > 5) {                              \
474       t = (low2+high2)/2;                                 \
475       if (rp2[t] > col) high2 = t;                        \
476       else             low2  = t;                         \
477     }                                                     \
478     for (_i=low2; _i<high2; _i++) {                       \
479       if (rp2[_i] > col) break;                           \
480       if (rp2[_i] == col) {                               \
481         if (addv == ADD_VALUES) {                         \
482           ap2[_i] += value;                               \
483           (void)PetscLogFlops(1.0);                       \
484         }                                                 \
485         else                    ap2[_i] = value;          \
486         goto b_noinsert;                                  \
487       }                                                   \
488     }                                                     \
489     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
490     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
491     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
492     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
493     N = nrow2++ - 1; b->nz++; high2++;                    \
494     /* shift up all the later entries in this row */      \
495     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
496     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
497     rp2[_i] = col;                                        \
498     ap2[_i] = value;                                      \
499     B->nonzerostate++;                                    \
500     b_noinsert: ;                                         \
501     bilen[row] = nrow2;                                   \
502   }
503 
504 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
505 {
506   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
507   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
508   PetscErrorCode ierr;
509   PetscInt       l,*garray = mat->garray,diag;
510 
511   PetscFunctionBegin;
512   /* code only works for square matrices A */
513 
514   /* find size of row to the left of the diagonal part */
515   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
516   row  = row - diag;
517   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
518     if (garray[b->j[b->i[row]+l]] > diag) break;
519   }
520   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
521 
522   /* diagonal part */
523   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
524 
525   /* right of diagonal part */
526   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
531 {
532   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
533   PetscScalar    value = 0.0;
534   PetscErrorCode ierr;
535   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
536   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
537   PetscBool      roworiented = aij->roworiented;
538 
539   /* Some Variables required in the macro */
540   Mat        A                 = aij->A;
541   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
542   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
543   MatScalar  *aa               = a->a;
544   PetscBool  ignorezeroentries = a->ignorezeroentries;
545   Mat        B                 = aij->B;
546   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
547   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
548   MatScalar  *ba               = b->a;
549 
550   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
551   PetscInt  nonew;
552   MatScalar *ap1,*ap2;
553 
554   PetscFunctionBegin;
555   for (i=0; i<m; i++) {
556     if (im[i] < 0) continue;
557 #if defined(PETSC_USE_DEBUG)
558     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
559 #endif
560     if (im[i] >= rstart && im[i] < rend) {
561       row      = im[i] - rstart;
562       lastcol1 = -1;
563       rp1      = aj + ai[row];
564       ap1      = aa + ai[row];
565       rmax1    = aimax[row];
566       nrow1    = ailen[row];
567       low1     = 0;
568       high1    = nrow1;
569       lastcol2 = -1;
570       rp2      = bj + bi[row];
571       ap2      = ba + bi[row];
572       rmax2    = bimax[row];
573       nrow2    = bilen[row];
574       low2     = 0;
575       high2    = nrow2;
576 
577       for (j=0; j<n; j++) {
578         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
583           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
584         } else if (in[j] < 0) continue;
585 #if defined(PETSC_USE_DEBUG)
586         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
587 #endif
588         else {
589           if (mat->was_assembled) {
590             if (!aij->colmap) {
591               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
592             }
593 #if defined(PETSC_USE_CTABLE)
594             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
595             col--;
596 #else
597             col = aij->colmap[in[j]] - 1;
598 #endif
599             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
600               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
601               col  =  in[j];
602               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
603               B     = aij->B;
604               b     = (Mat_SeqAIJ*)B->data;
605               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
606               rp2   = bj + bi[row];
607               ap2   = ba + bi[row];
608               rmax2 = bimax[row];
609               nrow2 = bilen[row];
610               low2  = 0;
611               high2 = nrow2;
612               bm    = aij->B->rmap->n;
613               ba    = b->a;
614             } else if (col < 0) {
615               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
616                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
617               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
618             }
619           } else col = in[j];
620           nonew = b->nonew;
621           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
622         }
623       }
624     } else {
625       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
626       if (!aij->donotstash) {
627         mat->assembled = PETSC_FALSE;
628         if (roworiented) {
629           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
630         } else {
631           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
632         }
633       }
634     }
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 /*
640     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
641     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
642     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
643 */
644 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
645 {
646   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
647   Mat            A           = aij->A; /* diagonal part of the matrix */
648   Mat            B           = aij->B; /* offdiagonal part of the matrix */
649   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
650   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
651   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
652   PetscInt       *ailen      = a->ilen,*aj = a->j;
653   PetscInt       *bilen      = b->ilen,*bj = b->j;
654   PetscInt       am          = aij->A->rmap->n,j;
655   PetscInt       diag_so_far = 0,dnz;
656   PetscInt       offd_so_far = 0,onz;
657 
658   PetscFunctionBegin;
659   /* Iterate over all rows of the matrix */
660   for (j=0; j<am; j++) {
661     dnz = onz = 0;
662     /*  Iterate over all non-zero columns of the current row */
663     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
664       /* If column is in the diagonal */
665       if (mat_j[col] >= cstart && mat_j[col] < cend) {
666         aj[diag_so_far++] = mat_j[col] - cstart;
667         dnz++;
668       } else { /* off-diagonal entries */
669         bj[offd_so_far++] = mat_j[col];
670         onz++;
671       }
672     }
673     ailen[j] = dnz;
674     bilen[j] = onz;
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 /*
680     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
681     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
682     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
683     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
684     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
685 */
686 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
687 {
688   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
689   Mat            A      = aij->A; /* diagonal part of the matrix */
690   Mat            B      = aij->B; /* offdiagonal part of the matrix */
691   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
692   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
693   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
694   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
695   PetscInt       *ailen = a->ilen,*aj = a->j;
696   PetscInt       *bilen = b->ilen,*bj = b->j;
697   PetscInt       am     = aij->A->rmap->n,j;
698   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
699   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
700   PetscScalar    *aa = a->a,*ba = b->a;
701 
702   PetscFunctionBegin;
703   /* Iterate over all rows of the matrix */
704   for (j=0; j<am; j++) {
705     dnz_row = onz_row = 0;
706     rowstart_offd = full_offd_i[j];
707     rowstart_diag = full_diag_i[j];
708     /*  Iterate over all non-zero columns of the current row */
709     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
710       /* If column is in the diagonal */
711       if (mat_j[col] >= cstart && mat_j[col] < cend) {
712         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
713         aa[rowstart_diag+dnz_row] = mat_a[col];
714         dnz_row++;
715       } else { /* off-diagonal entries */
716         bj[rowstart_offd+onz_row] = mat_j[col];
717         ba[rowstart_offd+onz_row] = mat_a[col];
718         onz_row++;
719       }
720     }
721     ailen[j] = dnz_row;
722     bilen[j] = onz_row;
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
728 {
729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
730   PetscErrorCode ierr;
731   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
732   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
733 
734   PetscFunctionBegin;
735   for (i=0; i<m; i++) {
736     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
737     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
738     if (idxm[i] >= rstart && idxm[i] < rend) {
739       row = idxm[i] - rstart;
740       for (j=0; j<n; j++) {
741         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
742         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
743         if (idxn[j] >= cstart && idxn[j] < cend) {
744           col  = idxn[j] - cstart;
745           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
746         } else {
747           if (!aij->colmap) {
748             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
749           }
750 #if defined(PETSC_USE_CTABLE)
751           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
752           col--;
753 #else
754           col = aij->colmap[idxn[j]] - 1;
755 #endif
756           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
757           else {
758             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
759           }
760         }
761       }
762     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
763   }
764   PetscFunctionReturn(0);
765 }
766 
767 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
768 
769 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
770 {
771   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
772   PetscErrorCode ierr;
773   PetscInt       nstash,reallocs;
774 
775   PetscFunctionBegin;
776   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
777 
778   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
779   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
780   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
785 {
786   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
787   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
788   PetscErrorCode ierr;
789   PetscMPIInt    n;
790   PetscInt       i,j,rstart,ncols,flg;
791   PetscInt       *row,*col;
792   PetscBool      other_disassembled;
793   PetscScalar    *val;
794 
795   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
796 
797   PetscFunctionBegin;
798   if (!aij->donotstash && !mat->nooffprocentries) {
799     while (1) {
800       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
801       if (!flg) break;
802 
803       for (i=0; i<n; ) {
804         /* Now identify the consecutive vals belonging to the same row */
805         for (j=i,rstart=row[j]; j<n; j++) {
806           if (row[j] != rstart) break;
807         }
808         if (j < n) ncols = j-i;
809         else       ncols = n-i;
810         /* Now assemble all these values with a single function call */
811         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
812 
813         i = j;
814       }
815     }
816     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
817   }
818 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
819   if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU) aij->A->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
820 #endif
821   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
822   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
823 
824   /* determine if any processor has disassembled, if so we must
825      also disassemble ourself, in order that we may reassemble. */
826   /*
827      if nonzero structure of submatrix B cannot change then we know that
828      no processor disassembled thus we can skip this stuff
829   */
830   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
831     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
832     if (mat->was_assembled && !other_disassembled) {
833 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
834       aij->B->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
835 #endif
836       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
837     }
838   }
839   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
840     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
841   }
842   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
843 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
844   if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU && aij->B->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) aij->B->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
845 #endif
846   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
847   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
848 
849   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
850 
851   aij->rowvalues = 0;
852 
853   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
854   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
855 
856   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
857   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
858     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
859     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
860   }
861 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
862   mat->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
863 #endif
864   PetscFunctionReturn(0);
865 }
866 
867 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
868 {
869   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
870   PetscErrorCode ierr;
871 
872   PetscFunctionBegin;
873   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
874   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
879 {
880   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
881   PetscObjectState sA, sB;
882   PetscInt        *lrows;
883   PetscInt         r, len;
884   PetscBool        cong, lch, gch;
885   PetscErrorCode   ierr;
886 
887   PetscFunctionBegin;
888   /* get locally owned rows */
889   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
890   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
891   /* fix right hand side if needed */
892   if (x && b) {
893     const PetscScalar *xx;
894     PetscScalar       *bb;
895 
896     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
897     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
898     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
899     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
900     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
901     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
902   }
903 
904   sA = mat->A->nonzerostate;
905   sB = mat->B->nonzerostate;
906 
907   if (diag != 0.0 && cong) {
908     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
909     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
910   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
911     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
912     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
913     PetscInt   nnwA, nnwB;
914     PetscBool  nnzA, nnzB;
915 
916     nnwA = aijA->nonew;
917     nnwB = aijB->nonew;
918     nnzA = aijA->keepnonzeropattern;
919     nnzB = aijB->keepnonzeropattern;
920     if (!nnzA) {
921       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
922       aijA->nonew = 0;
923     }
924     if (!nnzB) {
925       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
926       aijB->nonew = 0;
927     }
928     /* Must zero here before the next loop */
929     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
930     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) {
932       const PetscInt row = lrows[r] + A->rmap->rstart;
933       if (row >= A->cmap->N) continue;
934       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
935     }
936     aijA->nonew = nnwA;
937     aijB->nonew = nnwB;
938   } else {
939     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
940     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
941   }
942   ierr = PetscFree(lrows);CHKERRQ(ierr);
943   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
944   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
945 
946   /* reduce nonzerostate */
947   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
948   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
949   if (gch) A->nonzerostate++;
950   PetscFunctionReturn(0);
951 }
952 
953 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
954 {
955   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode    ierr;
957   PetscMPIInt       n = A->rmap->n;
958   PetscInt          i,j,r,m,p = 0,len = 0;
959   PetscInt          *lrows,*owners = A->rmap->range;
960   PetscSFNode       *rrows;
961   PetscSF           sf;
962   const PetscScalar *xx;
963   PetscScalar       *bb,*mask;
964   Vec               xmask,lmask;
965   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
966   const PetscInt    *aj, *ii,*ridx;
967   PetscScalar       *aa;
968 
969   PetscFunctionBegin;
970   /* Create SF where leaves are input rows and roots are owned rows */
971   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
972   for (r = 0; r < n; ++r) lrows[r] = -1;
973   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
974   for (r = 0; r < N; ++r) {
975     const PetscInt idx   = rows[r];
976     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
977     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
978       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
979     }
980     rrows[r].rank  = p;
981     rrows[r].index = rows[r] - owners[p];
982   }
983   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
984   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
985   /* Collect flags for rows to be zeroed */
986   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
987   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
988   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
989   /* Compress and put in row numbers */
990   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
991   /* zero diagonal part of matrix */
992   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
993   /* handle off diagonal part of matrix */
994   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
995   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
996   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
997   for (i=0; i<len; i++) bb[lrows[i]] = 1;
998   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
999   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1000   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1002   if (x && b) { /* this code is buggy when the row and column layout don't match */
1003     PetscBool cong;
1004 
1005     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1006     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1007     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1010     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1011   }
1012   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1013   /* remove zeroed rows of off diagonal matrix */
1014   ii = aij->i;
1015   for (i=0; i<len; i++) {
1016     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1017   }
1018   /* loop over all elements of off process part of matrix zeroing removed columns*/
1019   if (aij->compressedrow.use) {
1020     m    = aij->compressedrow.nrows;
1021     ii   = aij->compressedrow.i;
1022     ridx = aij->compressedrow.rindex;
1023     for (i=0; i<m; i++) {
1024       n  = ii[i+1] - ii[i];
1025       aj = aij->j + ii[i];
1026       aa = aij->a + ii[i];
1027 
1028       for (j=0; j<n; j++) {
1029         if (PetscAbsScalar(mask[*aj])) {
1030           if (b) bb[*ridx] -= *aa*xx[*aj];
1031           *aa = 0.0;
1032         }
1033         aa++;
1034         aj++;
1035       }
1036       ridx++;
1037     }
1038   } else { /* do not use compressed row format */
1039     m = l->B->rmap->n;
1040     for (i=0; i<m; i++) {
1041       n  = ii[i+1] - ii[i];
1042       aj = aij->j + ii[i];
1043       aa = aij->a + ii[i];
1044       for (j=0; j<n; j++) {
1045         if (PetscAbsScalar(mask[*aj])) {
1046           if (b) bb[i] -= *aa*xx[*aj];
1047           *aa = 0.0;
1048         }
1049         aa++;
1050         aj++;
1051       }
1052     }
1053   }
1054   if (x && b) {
1055     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1056     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1057   }
1058   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1059   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1060   ierr = PetscFree(lrows);CHKERRQ(ierr);
1061 
1062   /* only change matrix nonzero state if pattern was allowed to be changed */
1063   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1064     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1065     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1071 {
1072   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1073   PetscErrorCode ierr;
1074   PetscInt       nt;
1075   VecScatter     Mvctx = a->Mvctx;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1079   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1080 
1081   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1083   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102   VecScatter     Mvctx = a->Mvctx;
1103 
1104   PetscFunctionBegin;
1105   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1106   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1108   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1109   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1114 {
1115   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   /* do nondiagonal part */
1120   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1121   /* do local part */
1122   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1123   /* add partial results together */
1124   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1125   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1130 {
1131   MPI_Comm       comm;
1132   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1133   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1134   IS             Me,Notme;
1135   PetscErrorCode ierr;
1136   PetscInt       M,N,first,last,*notme,i;
1137   PetscBool      lf;
1138   PetscMPIInt    size;
1139 
1140   PetscFunctionBegin;
1141   /* Easy test: symmetric diagonal block */
1142   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1143   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1144   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1145   if (!*f) PetscFunctionReturn(0);
1146   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1147   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1148   if (size == 1) PetscFunctionReturn(0);
1149 
1150   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1151   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1152   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1153   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1154   for (i=0; i<first; i++) notme[i] = i;
1155   for (i=last; i<M; i++) notme[i-last+first] = i;
1156   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1157   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1158   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1159   Aoff = Aoffs[0];
1160   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1161   Boff = Boffs[0];
1162   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1163   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1164   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1165   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1166   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1167   ierr = PetscFree(notme);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1172 {
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1181 {
1182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   /* do nondiagonal part */
1187   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1188   /* do local part */
1189   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1190   /* add partial results together */
1191   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1192   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 /*
1197   This only works correctly for square matrices where the subblock A->A is the
1198    diagonal block
1199 */
1200 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1201 {
1202   PetscErrorCode ierr;
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204 
1205   PetscFunctionBegin;
1206   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1207   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1208   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1219   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1224 {
1225   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229 #if defined(PETSC_USE_LOG)
1230   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1231 #endif
1232   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1233   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1234   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1235   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1236 #if defined(PETSC_USE_CTABLE)
1237   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1238 #else
1239   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1240 #endif
1241   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1242   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1243   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1244   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1245   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1246   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1247   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1248 
1249   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1256   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1258 #if defined(PETSC_HAVE_ELEMENTAL)
1259   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1260 #endif
1261 #if defined(PETSC_HAVE_HYPRE)
1262   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1263   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1264 #endif
1265   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1266   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1274   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1275   PetscErrorCode ierr;
1276   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1277   int            fd;
1278   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1279   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1280   PetscScalar    *column_values;
1281   PetscInt       message_count,flowcontrolcount;
1282   FILE           *file;
1283 
1284   PetscFunctionBegin;
1285   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1286   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1287   nz   = A->nz + B->nz;
1288   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1289   if (!rank) {
1290     header[0] = MAT_FILE_CLASSID;
1291     header[1] = mat->rmap->N;
1292     header[2] = mat->cmap->N;
1293 
1294     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1296     /* get largest number of rows any processor has */
1297     rlen  = mat->rmap->n;
1298     range = mat->rmap->range;
1299     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1300   } else {
1301     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     rlen = mat->rmap->n;
1303   }
1304 
1305   /* load up the local row counts */
1306   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1307   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1308 
1309   /* store the row lengths to the file */
1310   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1311   if (!rank) {
1312     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     for (i=1; i<size; i++) {
1314       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1315       rlen = range[i+1] - range[i];
1316       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1318     }
1319     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1320   } else {
1321     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1322     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1323     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1324   }
1325   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1326 
1327   /* load up the local column indices */
1328   nzmax = nz; /* th processor needs space a largest processor needs */
1329   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1330   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1331   cnt   = 0;
1332   for (i=0; i<mat->rmap->n; i++) {
1333     for (j=B->i[i]; j<B->i[i+1]; j++) {
1334       if ((col = garray[B->j[j]]) > cstart) break;
1335       column_indices[cnt++] = col;
1336     }
1337     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1338     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1339   }
1340   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1341 
1342   /* store the column indices to the file */
1343   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1344   if (!rank) {
1345     MPI_Status status;
1346     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1347     for (i=1; i<size; i++) {
1348       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1349       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1350       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1351       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1352       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1353     }
1354     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1355   } else {
1356     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1357     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1358     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1359     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1360   }
1361   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1362 
1363   /* load up the local column values */
1364   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1365   cnt  = 0;
1366   for (i=0; i<mat->rmap->n; i++) {
1367     for (j=B->i[i]; j<B->i[i+1]; j++) {
1368       if (garray[B->j[j]] > cstart) break;
1369       column_values[cnt++] = B->a[j];
1370     }
1371     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1372     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1373   }
1374   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1375 
1376   /* store the column values to the file */
1377   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1378   if (!rank) {
1379     MPI_Status status;
1380     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1381     for (i=1; i<size; i++) {
1382       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1383       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1384       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1385       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1386       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1387     }
1388     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1389   } else {
1390     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1391     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1392     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1393     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1394   }
1395   ierr = PetscFree(column_values);CHKERRQ(ierr);
1396 
1397   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1398   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #include <petscdraw.h>
1403 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1404 {
1405   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1406   PetscErrorCode    ierr;
1407   PetscMPIInt       rank = aij->rank,size = aij->size;
1408   PetscBool         isdraw,iascii,isbinary;
1409   PetscViewer       sviewer;
1410   PetscViewerFormat format;
1411 
1412   PetscFunctionBegin;
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1415   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1416   if (iascii) {
1417     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1418     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1419       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1420       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1421       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1422       for (i=0; i<(PetscInt)size; i++) {
1423         nmax = PetscMax(nmax,nz[i]);
1424         nmin = PetscMin(nmin,nz[i]);
1425         navg += nz[i];
1426       }
1427       ierr = PetscFree(nz);CHKERRQ(ierr);
1428       navg = navg/size;
1429       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1430       PetscFunctionReturn(0);
1431     }
1432     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1433     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1434       MatInfo   info;
1435       PetscBool inodes;
1436 
1437       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1438       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1439       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1440       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1441       if (!inodes) {
1442         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1443                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1444       } else {
1445         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1446                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1447       }
1448       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1449       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1450       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1451       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1452       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1454       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1455       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1456       PetscFunctionReturn(0);
1457     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1458       PetscInt inodecount,inodelimit,*inodes;
1459       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1460       if (inodes) {
1461         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1462       } else {
1463         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1464       }
1465       PetscFunctionReturn(0);
1466     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1467       PetscFunctionReturn(0);
1468     }
1469   } else if (isbinary) {
1470     if (size == 1) {
1471       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1472       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1473     } else {
1474       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1475     }
1476     PetscFunctionReturn(0);
1477   } else if (iascii && size == 1) {
1478     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1479     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1480     PetscFunctionReturn(0);
1481   } else if (isdraw) {
1482     PetscDraw draw;
1483     PetscBool isnull;
1484     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1485     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1486     if (isnull) PetscFunctionReturn(0);
1487   }
1488 
1489   { /* assemble the entire matrix onto first processor */
1490     Mat A = NULL, Av;
1491     IS  isrow,iscol;
1492 
1493     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1494     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1495     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1496     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1497 /*  The commented code uses MatCreateSubMatrices instead */
1498 /*
1499     Mat *AA, A = NULL, Av;
1500     IS  isrow,iscol;
1501 
1502     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1503     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1504     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1505     if (!rank) {
1506        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1507        A    = AA[0];
1508        Av   = AA[0];
1509     }
1510     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1511 */
1512     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1513     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1514     /*
1515        Everyone has to call to draw the matrix since the graphics waits are
1516        synchronized across all processors that share the PetscDraw object
1517     */
1518     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1519     if (!rank) {
1520       if (((PetscObject)mat)->name) {
1521         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1522       }
1523       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1524     }
1525     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1526     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1527     ierr = MatDestroy(&A);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1533 {
1534   PetscErrorCode ierr;
1535   PetscBool      iascii,isdraw,issocket,isbinary;
1536 
1537   PetscFunctionBegin;
1538   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1539   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1540   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1541   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1542   if (iascii || isdraw || isbinary || issocket) {
1543     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1544   }
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1549 {
1550   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1551   PetscErrorCode ierr;
1552   Vec            bb1 = 0;
1553   PetscBool      hasop;
1554 
1555   PetscFunctionBegin;
1556   if (flag == SOR_APPLY_UPPER) {
1557     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1558     PetscFunctionReturn(0);
1559   }
1560 
1561   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1562     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1563   }
1564 
1565   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1566     if (flag & SOR_ZERO_INITIAL_GUESS) {
1567       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1568       its--;
1569     }
1570 
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587     while (its--) {
1588       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590 
1591       /* update rhs: bb1 = bb - B*x */
1592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1594 
1595       /* local sweep */
1596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1597     }
1598   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1599     if (flag & SOR_ZERO_INITIAL_GUESS) {
1600       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1601       its--;
1602     }
1603     while (its--) {
1604       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606 
1607       /* update rhs: bb1 = bb - B*x */
1608       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1609       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1610 
1611       /* local sweep */
1612       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1613     }
1614   } else if (flag & SOR_EISENSTAT) {
1615     Vec xx1;
1616 
1617     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1618     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1619 
1620     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1621     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1622     if (!mat->diag) {
1623       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1624       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1625     }
1626     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1627     if (hasop) {
1628       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1629     } else {
1630       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1631     }
1632     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1633 
1634     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1635 
1636     /* local sweep */
1637     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1638     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1639     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1640   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1641 
1642   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1643 
1644   matin->factorerrortype = mat->A->factorerrortype;
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1649 {
1650   Mat            aA,aB,Aperm;
1651   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1652   PetscScalar    *aa,*ba;
1653   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1654   PetscSF        rowsf,sf;
1655   IS             parcolp = NULL;
1656   PetscBool      done;
1657   PetscErrorCode ierr;
1658 
1659   PetscFunctionBegin;
1660   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1661   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1664 
1665   /* Invert row permutation to find out where my rows should go */
1666   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1667   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1668   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1669   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1670   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1671   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1672 
1673   /* Invert column permutation to find out where my columns should go */
1674   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1675   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1676   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1677   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1678   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1679   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1680   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1681 
1682   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1683   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1684   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1685 
1686   /* Find out where my gcols should go */
1687   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1688   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1689   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1690   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1691   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1692   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1693   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1694   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1695 
1696   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1697   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1698   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1699   for (i=0; i<m; i++) {
1700     PetscInt row = rdest[i],rowner;
1701     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1702     for (j=ai[i]; j<ai[i+1]; j++) {
1703       PetscInt cowner,col = cdest[aj[j]];
1704       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1705       if (rowner == cowner) dnnz[i]++;
1706       else onnz[i]++;
1707     }
1708     for (j=bi[i]; j<bi[i+1]; j++) {
1709       PetscInt cowner,col = gcdest[bj[j]];
1710       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1711       if (rowner == cowner) dnnz[i]++;
1712       else onnz[i]++;
1713     }
1714   }
1715   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1716   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1717   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1718   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1719   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1720 
1721   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1722   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1723   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1724   for (i=0; i<m; i++) {
1725     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1726     PetscInt j0,rowlen;
1727     rowlen = ai[i+1] - ai[i];
1728     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1729       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1730       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1731     }
1732     rowlen = bi[i+1] - bi[i];
1733     for (j0=j=0; j<rowlen; j0=j) {
1734       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1735       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1736     }
1737   }
1738   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1739   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1740   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1741   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1742   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1743   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1744   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1745   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1746   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1747   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1748   *B = Aperm;
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1753 {
1754   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1759   if (ghosts) *ghosts = aij->garray;
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1764 {
1765   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1766   Mat            A    = mat->A,B = mat->B;
1767   PetscErrorCode ierr;
1768   PetscReal      isend[5],irecv[5];
1769 
1770   PetscFunctionBegin;
1771   info->block_size = 1.0;
1772   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1773 
1774   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1775   isend[3] = info->memory;  isend[4] = info->mallocs;
1776 
1777   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1778 
1779   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1780   isend[3] += info->memory;  isend[4] += info->mallocs;
1781   if (flag == MAT_LOCAL) {
1782     info->nz_used      = isend[0];
1783     info->nz_allocated = isend[1];
1784     info->nz_unneeded  = isend[2];
1785     info->memory       = isend[3];
1786     info->mallocs      = isend[4];
1787   } else if (flag == MAT_GLOBAL_MAX) {
1788     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1789 
1790     info->nz_used      = irecv[0];
1791     info->nz_allocated = irecv[1];
1792     info->nz_unneeded  = irecv[2];
1793     info->memory       = irecv[3];
1794     info->mallocs      = irecv[4];
1795   } else if (flag == MAT_GLOBAL_SUM) {
1796     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1797 
1798     info->nz_used      = irecv[0];
1799     info->nz_allocated = irecv[1];
1800     info->nz_unneeded  = irecv[2];
1801     info->memory       = irecv[3];
1802     info->mallocs      = irecv[4];
1803   }
1804   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1805   info->fill_ratio_needed = 0;
1806   info->factor_mallocs    = 0;
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1811 {
1812   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1813   PetscErrorCode ierr;
1814 
1815   PetscFunctionBegin;
1816   switch (op) {
1817   case MAT_NEW_NONZERO_LOCATIONS:
1818   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1819   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1820   case MAT_KEEP_NONZERO_PATTERN:
1821   case MAT_NEW_NONZERO_LOCATION_ERR:
1822   case MAT_USE_INODES:
1823   case MAT_IGNORE_ZERO_ENTRIES:
1824     MatCheckPreallocated(A,1);
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1827     break;
1828   case MAT_ROW_ORIENTED:
1829     MatCheckPreallocated(A,1);
1830     a->roworiented = flg;
1831 
1832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1833     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1834     break;
1835   case MAT_NEW_DIAGONALS:
1836   case MAT_SORTED_FULL:
1837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1838     break;
1839   case MAT_IGNORE_OFF_PROC_ENTRIES:
1840     a->donotstash = flg;
1841     break;
1842   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1843   case MAT_SPD:
1844   case MAT_SYMMETRIC:
1845   case MAT_STRUCTURALLY_SYMMETRIC:
1846   case MAT_HERMITIAN:
1847   case MAT_SYMMETRY_ETERNAL:
1848     break;
1849   case MAT_SUBMAT_SINGLEIS:
1850     A->submat_singleis = flg;
1851     break;
1852   case MAT_STRUCTURE_ONLY:
1853     /* The option is handled directly by MatSetOption() */
1854     break;
1855   default:
1856     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1862 {
1863   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1864   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865   PetscErrorCode ierr;
1866   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1867   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1868   PetscInt       *cmap,*idx_p;
1869 
1870   PetscFunctionBegin;
1871   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1872   mat->getrowactive = PETSC_TRUE;
1873 
1874   if (!mat->rowvalues && (idx || v)) {
1875     /*
1876         allocate enough space to hold information from the longest row.
1877     */
1878     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1879     PetscInt   max = 1,tmp;
1880     for (i=0; i<matin->rmap->n; i++) {
1881       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1882       if (max < tmp) max = tmp;
1883     }
1884     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1885   }
1886 
1887   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1888   lrow = row - rstart;
1889 
1890   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1891   if (!v)   {pvA = 0; pvB = 0;}
1892   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1893   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1894   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1895   nztot = nzA + nzB;
1896 
1897   cmap = mat->garray;
1898   if (v  || idx) {
1899     if (nztot) {
1900       /* Sort by increasing column numbers, assuming A and B already sorted */
1901       PetscInt imark = -1;
1902       if (v) {
1903         *v = v_p = mat->rowvalues;
1904         for (i=0; i<nzB; i++) {
1905           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1906           else break;
1907         }
1908         imark = i;
1909         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1910         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1911       }
1912       if (idx) {
1913         *idx = idx_p = mat->rowindices;
1914         if (imark > -1) {
1915           for (i=0; i<imark; i++) {
1916             idx_p[i] = cmap[cworkB[i]];
1917           }
1918         } else {
1919           for (i=0; i<nzB; i++) {
1920             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1921             else break;
1922           }
1923           imark = i;
1924         }
1925         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1926         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1927       }
1928     } else {
1929       if (idx) *idx = 0;
1930       if (v)   *v   = 0;
1931     }
1932   }
1933   *nz  = nztot;
1934   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1935   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1940 {
1941   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1942 
1943   PetscFunctionBegin;
1944   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1945   aij->getrowactive = PETSC_FALSE;
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1950 {
1951   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1952   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1953   PetscErrorCode ierr;
1954   PetscInt       i,j,cstart = mat->cmap->rstart;
1955   PetscReal      sum = 0.0;
1956   MatScalar      *v;
1957 
1958   PetscFunctionBegin;
1959   if (aij->size == 1) {
1960     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1961   } else {
1962     if (type == NORM_FROBENIUS) {
1963       v = amat->a;
1964       for (i=0; i<amat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970       }
1971       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1978       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v     = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v   = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2010       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2011     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2017 {
2018   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2019   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2020   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2021   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2022   PetscErrorCode  ierr;
2023   Mat             B,A_diag,*B_diag;
2024   const MatScalar *array;
2025 
2026   PetscFunctionBegin;
2027   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2028   ai = Aloc->i; aj = Aloc->j;
2029   bi = Bloc->i; bj = Bloc->j;
2030   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2031     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2032     PetscSFNode          *oloc;
2033     PETSC_UNUSED PetscSF sf;
2034 
2035     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2036     /* compute d_nnz for preallocation */
2037     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2038     for (i=0; i<ai[ma]; i++) {
2039       d_nnz[aj[i]]++;
2040     }
2041     /* compute local off-diagonal contributions */
2042     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2043     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2044     /* map those to global */
2045     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2046     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2047     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2048     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2049     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2050     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2051     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2052 
2053     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2054     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2055     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2056     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2057     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2058     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2059   } else {
2060     B    = *matout;
2061     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2062   }
2063 
2064   b           = (Mat_MPIAIJ*)B->data;
2065   A_diag      = a->A;
2066   B_diag      = &b->A;
2067   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2068   A_diag_ncol = A_diag->cmap->N;
2069   B_diag_ilen = sub_B_diag->ilen;
2070   B_diag_i    = sub_B_diag->i;
2071 
2072   /* Set ilen for diagonal of B */
2073   for (i=0; i<A_diag_ncol; i++) {
2074     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2075   }
2076 
2077   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2078   very quickly (=without using MatSetValues), because all writes are local. */
2079   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2080 
2081   /* copy over the B part */
2082   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2083   array = Bloc->a;
2084   row   = A->rmap->rstart;
2085   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2086   cols_tmp = cols;
2087   for (i=0; i<mb; i++) {
2088     ncol = bi[i+1]-bi[i];
2089     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2090     row++;
2091     array += ncol; cols_tmp += ncol;
2092   }
2093   ierr = PetscFree(cols);CHKERRQ(ierr);
2094 
2095   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2098     *matout = B;
2099   } else {
2100     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2106 {
2107   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2108   Mat            a    = aij->A,b = aij->B;
2109   PetscErrorCode ierr;
2110   PetscInt       s1,s2,s3;
2111 
2112   PetscFunctionBegin;
2113   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2114   if (rr) {
2115     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2116     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2117     /* Overlap communication with computation. */
2118     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2119   }
2120   if (ll) {
2121     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2122     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2123     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2124   }
2125   /* scale  the diagonal block */
2126   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2127 
2128   if (rr) {
2129     /* Do a scatter end and then right scale the off-diagonal block */
2130     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2131     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2137 {
2138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2139   PetscErrorCode ierr;
2140 
2141   PetscFunctionBegin;
2142   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2147 {
2148   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2149   Mat            a,b,c,d;
2150   PetscBool      flg;
2151   PetscErrorCode ierr;
2152 
2153   PetscFunctionBegin;
2154   a = matA->A; b = matA->B;
2155   c = matB->A; d = matB->B;
2156 
2157   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2158   if (flg) {
2159     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2160   }
2161   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2166 {
2167   PetscErrorCode ierr;
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2170 
2171   PetscFunctionBegin;
2172   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2173   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2174     /* because of the column compression in the off-processor part of the matrix a->B,
2175        the number of columns in a->B and b->B may be different, hence we cannot call
2176        the MatCopy() directly on the two parts. If need be, we can provide a more
2177        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2178        then copying the submatrices */
2179     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2180   } else {
2181     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2182     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2183   }
2184   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2189 {
2190   PetscErrorCode ierr;
2191 
2192   PetscFunctionBegin;
2193   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 /*
2198    Computes the number of nonzeros per row needed for preallocation when X and Y
2199    have different nonzero structure.
2200 */
2201 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2202 {
2203   PetscInt       i,j,k,nzx,nzy;
2204 
2205   PetscFunctionBegin;
2206   /* Set the number of nonzeros in the new matrix */
2207   for (i=0; i<m; i++) {
2208     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2209     nzx = xi[i+1] - xi[i];
2210     nzy = yi[i+1] - yi[i];
2211     nnz[i] = 0;
2212     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2213       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2214       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2215       nnz[i]++;
2216     }
2217     for (; k<nzy; k++) nnz[i]++;
2218   }
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2223 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2224 {
2225   PetscErrorCode ierr;
2226   PetscInt       m = Y->rmap->N;
2227   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2228   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2229 
2230   PetscFunctionBegin;
2231   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2239   PetscBLASInt   bnz,one=1;
2240   Mat_SeqAIJ     *x,*y;
2241 
2242   PetscFunctionBegin;
2243   if (str == SAME_NONZERO_PATTERN) {
2244     PetscScalar alpha = a;
2245     x    = (Mat_SeqAIJ*)xx->A->data;
2246     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2247     y    = (Mat_SeqAIJ*)yy->A->data;
2248     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2249     x    = (Mat_SeqAIJ*)xx->B->data;
2250     y    = (Mat_SeqAIJ*)yy->B->data;
2251     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2252     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2253     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2254     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2255        will be updated */
2256 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2257     if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
2258       Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
2259     }
2260 #endif
2261   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2262     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2263   } else {
2264     Mat      B;
2265     PetscInt *nnz_d,*nnz_o;
2266     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2267     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2268     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2269     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2270     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2271     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2272     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2273     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2274     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2275     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2276     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2277     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2278     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2279     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2280   }
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2285 
2286 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2287 {
2288 #if defined(PETSC_USE_COMPLEX)
2289   PetscErrorCode ierr;
2290   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2291 
2292   PetscFunctionBegin;
2293   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2294   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2295 #else
2296   PetscFunctionBegin;
2297 #endif
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2302 {
2303   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2304   PetscErrorCode ierr;
2305 
2306   PetscFunctionBegin;
2307   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2308   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2313 {
2314   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2319   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2324 {
2325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327   PetscInt       i,*idxb = 0;
2328   PetscScalar    *va,*vb;
2329   Vec            vtmp;
2330 
2331   PetscFunctionBegin;
2332   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2333   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2334   if (idx) {
2335     for (i=0; i<A->rmap->n; i++) {
2336       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2337     }
2338   }
2339 
2340   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2341   if (idx) {
2342     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2343   }
2344   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2345   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2346 
2347   for (i=0; i<A->rmap->n; i++) {
2348     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2349       va[i] = vb[i];
2350       if (idx) idx[i] = a->garray[idxb[i]];
2351     }
2352   }
2353 
2354   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2355   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2356   ierr = PetscFree(idxb);CHKERRQ(ierr);
2357   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2362 {
2363   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2364   PetscErrorCode ierr;
2365   PetscInt       i,*idxb = 0;
2366   PetscScalar    *va,*vb;
2367   Vec            vtmp;
2368 
2369   PetscFunctionBegin;
2370   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2371   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2372   if (idx) {
2373     for (i=0; i<A->cmap->n; i++) {
2374       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2375     }
2376   }
2377 
2378   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2379   if (idx) {
2380     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2381   }
2382   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2383   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2384 
2385   for (i=0; i<A->rmap->n; i++) {
2386     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2387       va[i] = vb[i];
2388       if (idx) idx[i] = a->garray[idxb[i]];
2389     }
2390   }
2391 
2392   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2393   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2394   ierr = PetscFree(idxb);CHKERRQ(ierr);
2395   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2400 {
2401   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2402   PetscInt       n      = A->rmap->n;
2403   PetscInt       cstart = A->cmap->rstart;
2404   PetscInt       *cmap  = mat->garray;
2405   PetscInt       *diagIdx, *offdiagIdx;
2406   Vec            diagV, offdiagV;
2407   PetscScalar    *a, *diagA, *offdiagA;
2408   PetscInt       r;
2409   PetscErrorCode ierr;
2410 
2411   PetscFunctionBegin;
2412   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2413   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2414   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2415   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2416   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2417   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2418   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2419   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2420   for (r = 0; r < n; ++r) {
2421     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2422       a[r]   = diagA[r];
2423       idx[r] = cstart + diagIdx[r];
2424     } else {
2425       a[r]   = offdiagA[r];
2426       idx[r] = cmap[offdiagIdx[r]];
2427     }
2428   }
2429   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2430   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2431   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2432   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2433   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2434   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2439 {
2440   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2441   PetscInt       n      = A->rmap->n;
2442   PetscInt       cstart = A->cmap->rstart;
2443   PetscInt       *cmap  = mat->garray;
2444   PetscInt       *diagIdx, *offdiagIdx;
2445   Vec            diagV, offdiagV;
2446   PetscScalar    *a, *diagA, *offdiagA;
2447   PetscInt       r;
2448   PetscErrorCode ierr;
2449 
2450   PetscFunctionBegin;
2451   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2452   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2453   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2454   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2455   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2456   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2457   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2458   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2459   for (r = 0; r < n; ++r) {
2460     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2461       a[r]   = diagA[r];
2462       idx[r] = cstart + diagIdx[r];
2463     } else {
2464       a[r]   = offdiagA[r];
2465       idx[r] = cmap[offdiagIdx[r]];
2466     }
2467   }
2468   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2469   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2470   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2471   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2472   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2473   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2478 {
2479   PetscErrorCode ierr;
2480   Mat            *dummy;
2481 
2482   PetscFunctionBegin;
2483   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2484   *newmat = *dummy;
2485   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2490 {
2491   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2492   PetscErrorCode ierr;
2493 
2494   PetscFunctionBegin;
2495   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2496   A->factorerrortype = a->A->factorerrortype;
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2501 {
2502   PetscErrorCode ierr;
2503   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2504 
2505   PetscFunctionBegin;
2506   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2507   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2508   if (x->assembled) {
2509     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2510   } else {
2511     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2512   }
2513   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2514   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2519 {
2520   PetscFunctionBegin;
2521   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2522   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 /*@
2527    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2528 
2529    Collective on Mat
2530 
2531    Input Parameters:
2532 +    A - the matrix
2533 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2534 
2535  Level: advanced
2536 
2537 @*/
2538 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2539 {
2540   PetscErrorCode       ierr;
2541 
2542   PetscFunctionBegin;
2543   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2548 {
2549   PetscErrorCode       ierr;
2550   PetscBool            sc = PETSC_FALSE,flg;
2551 
2552   PetscFunctionBegin;
2553   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2554   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2555   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2556   if (flg) {
2557     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2558   }
2559   ierr = PetscOptionsTail();CHKERRQ(ierr);
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2564 {
2565   PetscErrorCode ierr;
2566   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2567   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2568 
2569   PetscFunctionBegin;
2570   if (!Y->preallocated) {
2571     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2572   } else if (!aij->nz) {
2573     PetscInt nonew = aij->nonew;
2574     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2575     aij->nonew = nonew;
2576   }
2577   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2582 {
2583   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2584   PetscErrorCode ierr;
2585 
2586   PetscFunctionBegin;
2587   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2588   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2589   if (d) {
2590     PetscInt rstart;
2591     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2592     *d += rstart;
2593 
2594   }
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2599 {
2600   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2601   PetscErrorCode ierr;
2602 
2603   PetscFunctionBegin;
2604   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2605   PetscFunctionReturn(0);
2606 }
2607 
2608 /* -------------------------------------------------------------------*/
2609 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2610                                        MatGetRow_MPIAIJ,
2611                                        MatRestoreRow_MPIAIJ,
2612                                        MatMult_MPIAIJ,
2613                                 /* 4*/ MatMultAdd_MPIAIJ,
2614                                        MatMultTranspose_MPIAIJ,
2615                                        MatMultTransposeAdd_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*10*/ 0,
2620                                        0,
2621                                        0,
2622                                        MatSOR_MPIAIJ,
2623                                        MatTranspose_MPIAIJ,
2624                                 /*15*/ MatGetInfo_MPIAIJ,
2625                                        MatEqual_MPIAIJ,
2626                                        MatGetDiagonal_MPIAIJ,
2627                                        MatDiagonalScale_MPIAIJ,
2628                                        MatNorm_MPIAIJ,
2629                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2630                                        MatAssemblyEnd_MPIAIJ,
2631                                        MatSetOption_MPIAIJ,
2632                                        MatZeroEntries_MPIAIJ,
2633                                 /*24*/ MatZeroRows_MPIAIJ,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*29*/ MatSetUp_MPIAIJ,
2639                                        0,
2640                                        0,
2641                                        MatGetDiagonalBlock_MPIAIJ,
2642                                        0,
2643                                 /*34*/ MatDuplicate_MPIAIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*39*/ MatAXPY_MPIAIJ,
2649                                        MatCreateSubMatrices_MPIAIJ,
2650                                        MatIncreaseOverlap_MPIAIJ,
2651                                        MatGetValues_MPIAIJ,
2652                                        MatCopy_MPIAIJ,
2653                                 /*44*/ MatGetRowMax_MPIAIJ,
2654                                        MatScale_MPIAIJ,
2655                                        MatShift_MPIAIJ,
2656                                        MatDiagonalSet_MPIAIJ,
2657                                        MatZeroRowsColumns_MPIAIJ,
2658                                 /*49*/ MatSetRandom_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2664                                        0,
2665                                        MatSetUnfactored_MPIAIJ,
2666                                        MatPermute_MPIAIJ,
2667                                        0,
2668                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2669                                        MatDestroy_MPIAIJ,
2670                                        MatView_MPIAIJ,
2671                                        0,
2672                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2673                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2674                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2679                                        MatGetRowMinAbs_MPIAIJ,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                 /*75*/ MatFDColoringApply_AIJ,
2685                                        MatSetFromOptions_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                        MatFindZeroDiagonals_MPIAIJ,
2689                                 /*80*/ 0,
2690                                        0,
2691                                        0,
2692                                 /*83*/ MatLoad_MPIAIJ,
2693                                        MatIsSymmetric_MPIAIJ,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2699                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2700                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2701                                        MatPtAP_MPIAIJ_MPIAIJ,
2702                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2703                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                 /*99*/ 0,
2709                                        0,
2710                                        0,
2711                                        MatConjugate_MPIAIJ,
2712                                        0,
2713                                 /*104*/MatSetValuesRow_MPIAIJ,
2714                                        MatRealPart_MPIAIJ,
2715                                        MatImaginaryPart_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                 /*109*/0,
2719                                        0,
2720                                        MatGetRowMin_MPIAIJ,
2721                                        0,
2722                                        MatMissingDiagonal_MPIAIJ,
2723                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2724                                        0,
2725                                        MatGetGhosts_MPIAIJ,
2726                                        0,
2727                                        0,
2728                                 /*119*/0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        MatGetMultiProcBlock_MPIAIJ,
2733                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2734                                        MatGetColumnNorms_MPIAIJ,
2735                                        MatInvertBlockDiagonal_MPIAIJ,
2736                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2737                                        MatCreateSubMatricesMPI_MPIAIJ,
2738                                 /*129*/0,
2739                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2740                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2741                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2742                                        0,
2743                                 /*134*/0,
2744                                        0,
2745                                        MatRARt_MPIAIJ_MPIAIJ,
2746                                        0,
2747                                        0,
2748                                 /*139*/MatSetBlockSizes_MPIAIJ,
2749                                        0,
2750                                        0,
2751                                        MatFDColoringSetUp_MPIXAIJ,
2752                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2753                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2754 };
2755 
2756 /* ----------------------------------------------------------------------------------------*/
2757 
2758 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2759 {
2760   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2761   PetscErrorCode ierr;
2762 
2763   PetscFunctionBegin;
2764   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2765   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2770 {
2771   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2772   PetscErrorCode ierr;
2773 
2774   PetscFunctionBegin;
2775   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2776   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2781 {
2782   Mat_MPIAIJ     *b;
2783   PetscErrorCode ierr;
2784   PetscMPIInt    size;
2785 
2786   PetscFunctionBegin;
2787   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2788   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2789   b = (Mat_MPIAIJ*)B->data;
2790 
2791 #if defined(PETSC_USE_CTABLE)
2792   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2793 #else
2794   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2795 #endif
2796   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2797   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2798   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2799 
2800   /* Because the B will have been resized we simply destroy it and create a new one each time */
2801   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2802   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2803   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2804   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2805   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2806   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2807   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2808 
2809   if (!B->preallocated) {
2810     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2811     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2812     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2813     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2814     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2815   }
2816 
2817   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2818   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2819   B->preallocated  = PETSC_TRUE;
2820   B->was_assembled = PETSC_FALSE;
2821   B->assembled     = PETSC_FALSE;
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2826 {
2827   Mat_MPIAIJ     *b;
2828   PetscErrorCode ierr;
2829 
2830   PetscFunctionBegin;
2831   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2832   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2834   b = (Mat_MPIAIJ*)B->data;
2835 
2836 #if defined(PETSC_USE_CTABLE)
2837   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2838 #else
2839   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2840 #endif
2841   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2842   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2843   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2844 
2845   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2846   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2847   B->preallocated  = PETSC_TRUE;
2848   B->was_assembled = PETSC_FALSE;
2849   B->assembled = PETSC_FALSE;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2854 {
2855   Mat            mat;
2856   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2857   PetscErrorCode ierr;
2858 
2859   PetscFunctionBegin;
2860   *newmat = 0;
2861   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2862   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2863   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2864   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2865   a       = (Mat_MPIAIJ*)mat->data;
2866 
2867   mat->factortype   = matin->factortype;
2868   mat->assembled    = PETSC_TRUE;
2869   mat->insertmode   = NOT_SET_VALUES;
2870   mat->preallocated = PETSC_TRUE;
2871 
2872   a->size         = oldmat->size;
2873   a->rank         = oldmat->rank;
2874   a->donotstash   = oldmat->donotstash;
2875   a->roworiented  = oldmat->roworiented;
2876   a->rowindices   = 0;
2877   a->rowvalues    = 0;
2878   a->getrowactive = PETSC_FALSE;
2879 
2880   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2881   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2882 
2883   if (oldmat->colmap) {
2884 #if defined(PETSC_USE_CTABLE)
2885     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2886 #else
2887     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2888     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2889     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2890 #endif
2891   } else a->colmap = 0;
2892   if (oldmat->garray) {
2893     PetscInt len;
2894     len  = oldmat->B->cmap->n;
2895     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2896     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2897     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2898   } else a->garray = 0;
2899 
2900   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2901   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2902   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2903   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2904 
2905   if (oldmat->Mvctx_mpi1) {
2906     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2907     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2908   }
2909 
2910   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2911   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2912   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2913   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2914   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2915   *newmat = mat;
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2920 {
2921   PetscBool      isbinary, ishdf5;
2922   PetscErrorCode ierr;
2923 
2924   PetscFunctionBegin;
2925   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2926   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2927   /* force binary viewer to load .info file if it has not yet done so */
2928   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2929   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2930   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2931   if (isbinary) {
2932     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2933   } else if (ishdf5) {
2934 #if defined(PETSC_HAVE_HDF5)
2935     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2936 #else
2937     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2938 #endif
2939   } else {
2940     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2941   }
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2946 {
2947   PetscScalar    *vals,*svals;
2948   MPI_Comm       comm;
2949   PetscErrorCode ierr;
2950   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2951   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2952   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2953   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2954   PetscInt       cend,cstart,n,*rowners;
2955   int            fd;
2956   PetscInt       bs = newMat->rmap->bs;
2957 
2958   PetscFunctionBegin;
2959   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2960   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2961   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2962   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2963   if (!rank) {
2964     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2965     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2966     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2967   }
2968 
2969   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2970   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2971   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2972   if (bs < 0) bs = 1;
2973 
2974   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2975   M    = header[1]; N = header[2];
2976 
2977   /* If global sizes are set, check if they are consistent with that given in the file */
2978   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2979   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2980 
2981   /* determine ownership of all (block) rows */
2982   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2983   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2984   else m = newMat->rmap->n; /* Set by user */
2985 
2986   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2987   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2988 
2989   /* First process needs enough room for process with most rows */
2990   if (!rank) {
2991     mmax = rowners[1];
2992     for (i=2; i<=size; i++) {
2993       mmax = PetscMax(mmax, rowners[i]);
2994     }
2995   } else mmax = -1;             /* unused, but compilers complain */
2996 
2997   rowners[0] = 0;
2998   for (i=2; i<=size; i++) {
2999     rowners[i] += rowners[i-1];
3000   }
3001   rstart = rowners[rank];
3002   rend   = rowners[rank+1];
3003 
3004   /* distribute row lengths to all processors */
3005   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3006   if (!rank) {
3007     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3008     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3009     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3010     for (j=0; j<m; j++) {
3011       procsnz[0] += ourlens[j];
3012     }
3013     for (i=1; i<size; i++) {
3014       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3015       /* calculate the number of nonzeros on each processor */
3016       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3017         procsnz[i] += rowlengths[j];
3018       }
3019       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3020     }
3021     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3022   } else {
3023     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3024   }
3025 
3026   if (!rank) {
3027     /* determine max buffer needed and allocate it */
3028     maxnz = 0;
3029     for (i=0; i<size; i++) {
3030       maxnz = PetscMax(maxnz,procsnz[i]);
3031     }
3032     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3033 
3034     /* read in my part of the matrix column indices  */
3035     nz   = procsnz[0];
3036     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3037     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3038 
3039     /* read in every one elses and ship off */
3040     for (i=1; i<size; i++) {
3041       nz   = procsnz[i];
3042       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3043       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3044     }
3045     ierr = PetscFree(cols);CHKERRQ(ierr);
3046   } else {
3047     /* determine buffer space needed for message */
3048     nz = 0;
3049     for (i=0; i<m; i++) {
3050       nz += ourlens[i];
3051     }
3052     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3053 
3054     /* receive message of column indices*/
3055     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3056   }
3057 
3058   /* determine column ownership if matrix is not square */
3059   if (N != M) {
3060     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3061     else n = newMat->cmap->n;
3062     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3063     cstart = cend - n;
3064   } else {
3065     cstart = rstart;
3066     cend   = rend;
3067     n      = cend - cstart;
3068   }
3069 
3070   /* loop over local rows, determining number of off diagonal entries */
3071   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3072   jj   = 0;
3073   for (i=0; i<m; i++) {
3074     for (j=0; j<ourlens[i]; j++) {
3075       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3076       jj++;
3077     }
3078   }
3079 
3080   for (i=0; i<m; i++) {
3081     ourlens[i] -= offlens[i];
3082   }
3083   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3084 
3085   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3086 
3087   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3088 
3089   for (i=0; i<m; i++) {
3090     ourlens[i] += offlens[i];
3091   }
3092 
3093   if (!rank) {
3094     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3095 
3096     /* read in my part of the matrix numerical values  */
3097     nz   = procsnz[0];
3098     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3099 
3100     /* insert into matrix */
3101     jj      = rstart;
3102     smycols = mycols;
3103     svals   = vals;
3104     for (i=0; i<m; i++) {
3105       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3106       smycols += ourlens[i];
3107       svals   += ourlens[i];
3108       jj++;
3109     }
3110 
3111     /* read in other processors and ship out */
3112     for (i=1; i<size; i++) {
3113       nz   = procsnz[i];
3114       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3115       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3116     }
3117     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3118   } else {
3119     /* receive numeric values */
3120     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3121 
3122     /* receive message of values*/
3123     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3124 
3125     /* insert into matrix */
3126     jj      = rstart;
3127     smycols = mycols;
3128     svals   = vals;
3129     for (i=0; i<m; i++) {
3130       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3131       smycols += ourlens[i];
3132       svals   += ourlens[i];
3133       jj++;
3134     }
3135   }
3136   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3137   ierr = PetscFree(vals);CHKERRQ(ierr);
3138   ierr = PetscFree(mycols);CHKERRQ(ierr);
3139   ierr = PetscFree(rowners);CHKERRQ(ierr);
3140   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 /* Not scalable because of ISAllGather() unless getting all columns. */
3146 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3147 {
3148   PetscErrorCode ierr;
3149   IS             iscol_local;
3150   PetscBool      isstride;
3151   PetscMPIInt    lisstride=0,gisstride;
3152 
3153   PetscFunctionBegin;
3154   /* check if we are grabbing all columns*/
3155   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3156 
3157   if (isstride) {
3158     PetscInt  start,len,mstart,mlen;
3159     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3160     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3161     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3162     if (mstart == start && mlen-mstart == len) lisstride = 1;
3163   }
3164 
3165   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3166   if (gisstride) {
3167     PetscInt N;
3168     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3169     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3170     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3171     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3172   } else {
3173     PetscInt cbs;
3174     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3175     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3176     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3177   }
3178 
3179   *isseq = iscol_local;
3180   PetscFunctionReturn(0);
3181 }
3182 
3183 /*
3184  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3185  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3186 
3187  Input Parameters:
3188    mat - matrix
3189    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3190            i.e., mat->rstart <= isrow[i] < mat->rend
3191    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3192            i.e., mat->cstart <= iscol[i] < mat->cend
3193  Output Parameter:
3194    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3195    iscol_o - sequential column index set for retrieving mat->B
3196    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3197  */
3198 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3199 {
3200   PetscErrorCode ierr;
3201   Vec            x,cmap;
3202   const PetscInt *is_idx;
3203   PetscScalar    *xarray,*cmaparray;
3204   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3205   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3206   Mat            B=a->B;
3207   Vec            lvec=a->lvec,lcmap;
3208   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3209   MPI_Comm       comm;
3210   VecScatter     Mvctx=a->Mvctx;
3211 
3212   PetscFunctionBegin;
3213   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3214   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3215 
3216   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3217   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3218   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3219   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3220   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3221 
3222   /* Get start indices */
3223   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3224   isstart -= ncols;
3225   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3226 
3227   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3228   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3229   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3230   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3231   for (i=0; i<ncols; i++) {
3232     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3233     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3234     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3235   }
3236   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3237   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3238   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3239 
3240   /* Get iscol_d */
3241   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3242   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3243   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3244 
3245   /* Get isrow_d */
3246   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3247   rstart = mat->rmap->rstart;
3248   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3249   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3250   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3251   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3252 
3253   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3254   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3255   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3256 
3257   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3258   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3259   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3260 
3261   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3262 
3263   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3264   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3265 
3266   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3267   /* off-process column indices */
3268   count = 0;
3269   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3270   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3271 
3272   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3273   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3274   for (i=0; i<Bn; i++) {
3275     if (PetscRealPart(xarray[i]) > -1.0) {
3276       idx[count]     = i;                   /* local column index in off-diagonal part B */
3277       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3278       count++;
3279     }
3280   }
3281   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3282   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3283 
3284   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3285   /* cannot ensure iscol_o has same blocksize as iscol! */
3286 
3287   ierr = PetscFree(idx);CHKERRQ(ierr);
3288   *garray = cmap1;
3289 
3290   ierr = VecDestroy(&x);CHKERRQ(ierr);
3291   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3292   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3297 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3298 {
3299   PetscErrorCode ierr;
3300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3301   Mat            M = NULL;
3302   MPI_Comm       comm;
3303   IS             iscol_d,isrow_d,iscol_o;
3304   Mat            Asub = NULL,Bsub = NULL;
3305   PetscInt       n;
3306 
3307   PetscFunctionBegin;
3308   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3309 
3310   if (call == MAT_REUSE_MATRIX) {
3311     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3312     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3313     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3314 
3315     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3316     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3317 
3318     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3319     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3320 
3321     /* Update diagonal and off-diagonal portions of submat */
3322     asub = (Mat_MPIAIJ*)(*submat)->data;
3323     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3324     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3325     if (n) {
3326       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3327     }
3328     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3330 
3331   } else { /* call == MAT_INITIAL_MATRIX) */
3332     const PetscInt *garray;
3333     PetscInt        BsubN;
3334 
3335     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3336     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3337 
3338     /* Create local submatrices Asub and Bsub */
3339     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3340     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3341 
3342     /* Create submatrix M */
3343     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3344 
3345     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3346     asub = (Mat_MPIAIJ*)M->data;
3347 
3348     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3349     n = asub->B->cmap->N;
3350     if (BsubN > n) {
3351       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3352       const PetscInt *idx;
3353       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3354       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3355 
3356       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3357       j = 0;
3358       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3359       for (i=0; i<n; i++) {
3360         if (j >= BsubN) break;
3361         while (subgarray[i] > garray[j]) j++;
3362 
3363         if (subgarray[i] == garray[j]) {
3364           idx_new[i] = idx[j++];
3365         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3366       }
3367       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3368 
3369       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3370       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3371 
3372     } else if (BsubN < n) {
3373       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3374     }
3375 
3376     ierr = PetscFree(garray);CHKERRQ(ierr);
3377     *submat = M;
3378 
3379     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3380     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3381     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3382 
3383     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3384     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3385 
3386     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3387     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3388   }
3389   PetscFunctionReturn(0);
3390 }
3391 
3392 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3393 {
3394   PetscErrorCode ierr;
3395   IS             iscol_local=NULL,isrow_d;
3396   PetscInt       csize;
3397   PetscInt       n,i,j,start,end;
3398   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3399   MPI_Comm       comm;
3400 
3401   PetscFunctionBegin;
3402   /* If isrow has same processor distribution as mat,
3403      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3404   if (call == MAT_REUSE_MATRIX) {
3405     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3406     if (isrow_d) {
3407       sameRowDist  = PETSC_TRUE;
3408       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3409     } else {
3410       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3411       if (iscol_local) {
3412         sameRowDist  = PETSC_TRUE;
3413         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3414       }
3415     }
3416   } else {
3417     /* Check if isrow has same processor distribution as mat */
3418     sameDist[0] = PETSC_FALSE;
3419     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3420     if (!n) {
3421       sameDist[0] = PETSC_TRUE;
3422     } else {
3423       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3424       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3425       if (i >= start && j < end) {
3426         sameDist[0] = PETSC_TRUE;
3427       }
3428     }
3429 
3430     /* Check if iscol has same processor distribution as mat */
3431     sameDist[1] = PETSC_FALSE;
3432     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3433     if (!n) {
3434       sameDist[1] = PETSC_TRUE;
3435     } else {
3436       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3437       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3438       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3439     }
3440 
3441     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3442     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3443     sameRowDist = tsameDist[0];
3444   }
3445 
3446   if (sameRowDist) {
3447     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3448       /* isrow and iscol have same processor distribution as mat */
3449       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3450       PetscFunctionReturn(0);
3451     } else { /* sameRowDist */
3452       /* isrow has same processor distribution as mat */
3453       if (call == MAT_INITIAL_MATRIX) {
3454         PetscBool sorted;
3455         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3456         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3457         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3458         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3459 
3460         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3461         if (sorted) {
3462           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3463           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3464           PetscFunctionReturn(0);
3465         }
3466       } else { /* call == MAT_REUSE_MATRIX */
3467         IS    iscol_sub;
3468         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3469         if (iscol_sub) {
3470           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3471           PetscFunctionReturn(0);
3472         }
3473       }
3474     }
3475   }
3476 
3477   /* General case: iscol -> iscol_local which has global size of iscol */
3478   if (call == MAT_REUSE_MATRIX) {
3479     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3480     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3481   } else {
3482     if (!iscol_local) {
3483       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3484     }
3485   }
3486 
3487   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3488   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3489 
3490   if (call == MAT_INITIAL_MATRIX) {
3491     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3492     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3493   }
3494   PetscFunctionReturn(0);
3495 }
3496 
3497 /*@C
3498      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3499          and "off-diagonal" part of the matrix in CSR format.
3500 
3501    Collective
3502 
3503    Input Parameters:
3504 +  comm - MPI communicator
3505 .  A - "diagonal" portion of matrix
3506 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3507 -  garray - global index of B columns
3508 
3509    Output Parameter:
3510 .   mat - the matrix, with input A as its local diagonal matrix
3511    Level: advanced
3512 
3513    Notes:
3514        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3515        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3516 
3517 .seealso: MatCreateMPIAIJWithSplitArrays()
3518 @*/
3519 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3520 {
3521   PetscErrorCode ierr;
3522   Mat_MPIAIJ     *maij;
3523   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3524   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3525   PetscScalar    *oa=b->a;
3526   Mat            Bnew;
3527   PetscInt       m,n,N;
3528 
3529   PetscFunctionBegin;
3530   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3531   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3532   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3533   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3534   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3535   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3536 
3537   /* Get global columns of mat */
3538   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3539 
3540   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3541   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3542   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3543   maij = (Mat_MPIAIJ*)(*mat)->data;
3544 
3545   (*mat)->preallocated = PETSC_TRUE;
3546 
3547   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3548   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3549 
3550   /* Set A as diagonal portion of *mat */
3551   maij->A = A;
3552 
3553   nz = oi[m];
3554   for (i=0; i<nz; i++) {
3555     col   = oj[i];
3556     oj[i] = garray[col];
3557   }
3558 
3559    /* Set Bnew as off-diagonal portion of *mat */
3560   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3561   bnew        = (Mat_SeqAIJ*)Bnew->data;
3562   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3563   maij->B     = Bnew;
3564 
3565   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3566 
3567   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3568   b->free_a       = PETSC_FALSE;
3569   b->free_ij      = PETSC_FALSE;
3570   ierr = MatDestroy(&B);CHKERRQ(ierr);
3571 
3572   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3573   bnew->free_a       = PETSC_TRUE;
3574   bnew->free_ij      = PETSC_TRUE;
3575 
3576   /* condense columns of maij->B */
3577   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3578   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3579   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3580   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3581   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3582   PetscFunctionReturn(0);
3583 }
3584 
3585 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3586 
3587 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3588 {
3589   PetscErrorCode ierr;
3590   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3591   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3592   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3593   Mat            M,Msub,B=a->B;
3594   MatScalar      *aa;
3595   Mat_SeqAIJ     *aij;
3596   PetscInt       *garray = a->garray,*colsub,Ncols;
3597   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3598   IS             iscol_sub,iscmap;
3599   const PetscInt *is_idx,*cmap;
3600   PetscBool      allcolumns=PETSC_FALSE;
3601   MPI_Comm       comm;
3602 
3603   PetscFunctionBegin;
3604   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3605 
3606   if (call == MAT_REUSE_MATRIX) {
3607     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3608     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3609     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3610 
3611     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3612     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3613 
3614     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3615     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3616 
3617     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3618 
3619   } else { /* call == MAT_INITIAL_MATRIX) */
3620     PetscBool flg;
3621 
3622     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3623     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3624 
3625     /* (1) iscol -> nonscalable iscol_local */
3626     /* Check for special case: each processor gets entire matrix columns */
3627     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3628     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3629     if (allcolumns) {
3630       iscol_sub = iscol_local;
3631       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3632       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3633 
3634     } else {
3635       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3636       PetscInt *idx,*cmap1,k;
3637       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3638       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3639       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3640       count = 0;
3641       k     = 0;
3642       for (i=0; i<Ncols; i++) {
3643         j = is_idx[i];
3644         if (j >= cstart && j < cend) {
3645           /* diagonal part of mat */
3646           idx[count]     = j;
3647           cmap1[count++] = i; /* column index in submat */
3648         } else if (Bn) {
3649           /* off-diagonal part of mat */
3650           if (j == garray[k]) {
3651             idx[count]     = j;
3652             cmap1[count++] = i;  /* column index in submat */
3653           } else if (j > garray[k]) {
3654             while (j > garray[k] && k < Bn-1) k++;
3655             if (j == garray[k]) {
3656               idx[count]     = j;
3657               cmap1[count++] = i; /* column index in submat */
3658             }
3659           }
3660         }
3661       }
3662       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3663 
3664       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3665       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3666       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3667 
3668       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3669     }
3670 
3671     /* (3) Create sequential Msub */
3672     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3673   }
3674 
3675   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3676   aij  = (Mat_SeqAIJ*)(Msub)->data;
3677   ii   = aij->i;
3678   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3679 
3680   /*
3681       m - number of local rows
3682       Ncols - number of columns (same on all processors)
3683       rstart - first row in new global matrix generated
3684   */
3685   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3686 
3687   if (call == MAT_INITIAL_MATRIX) {
3688     /* (4) Create parallel newmat */
3689     PetscMPIInt    rank,size;
3690     PetscInt       csize;
3691 
3692     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3693     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3694 
3695     /*
3696         Determine the number of non-zeros in the diagonal and off-diagonal
3697         portions of the matrix in order to do correct preallocation
3698     */
3699 
3700     /* first get start and end of "diagonal" columns */
3701     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3702     if (csize == PETSC_DECIDE) {
3703       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3704       if (mglobal == Ncols) { /* square matrix */
3705         nlocal = m;
3706       } else {
3707         nlocal = Ncols/size + ((Ncols % size) > rank);
3708       }
3709     } else {
3710       nlocal = csize;
3711     }
3712     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3713     rstart = rend - nlocal;
3714     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3715 
3716     /* next, compute all the lengths */
3717     jj    = aij->j;
3718     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3719     olens = dlens + m;
3720     for (i=0; i<m; i++) {
3721       jend = ii[i+1] - ii[i];
3722       olen = 0;
3723       dlen = 0;
3724       for (j=0; j<jend; j++) {
3725         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3726         else dlen++;
3727         jj++;
3728       }
3729       olens[i] = olen;
3730       dlens[i] = dlen;
3731     }
3732 
3733     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3734     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3735 
3736     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3737     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3738     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3739     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3740     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3741     ierr = PetscFree(dlens);CHKERRQ(ierr);
3742 
3743   } else { /* call == MAT_REUSE_MATRIX */
3744     M    = *newmat;
3745     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3746     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3747     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3748     /*
3749          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3750        rather than the slower MatSetValues().
3751     */
3752     M->was_assembled = PETSC_TRUE;
3753     M->assembled     = PETSC_FALSE;
3754   }
3755 
3756   /* (5) Set values of Msub to *newmat */
3757   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3758   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3759 
3760   jj   = aij->j;
3761   aa   = aij->a;
3762   for (i=0; i<m; i++) {
3763     row = rstart + i;
3764     nz  = ii[i+1] - ii[i];
3765     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3766     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3767     jj += nz; aa += nz;
3768   }
3769   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3770 
3771   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3773 
3774   ierr = PetscFree(colsub);CHKERRQ(ierr);
3775 
3776   /* save Msub, iscol_sub and iscmap used in processor for next request */
3777   if (call ==  MAT_INITIAL_MATRIX) {
3778     *newmat = M;
3779     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3780     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3781 
3782     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3783     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3784 
3785     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3786     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3787 
3788     if (iscol_local) {
3789       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3790       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3791     }
3792   }
3793   PetscFunctionReturn(0);
3794 }
3795 
3796 /*
3797     Not great since it makes two copies of the submatrix, first an SeqAIJ
3798   in local and then by concatenating the local matrices the end result.
3799   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3800 
3801   Note: This requires a sequential iscol with all indices.
3802 */
3803 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3804 {
3805   PetscErrorCode ierr;
3806   PetscMPIInt    rank,size;
3807   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3808   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3809   Mat            M,Mreuse;
3810   MatScalar      *aa,*vwork;
3811   MPI_Comm       comm;
3812   Mat_SeqAIJ     *aij;
3813   PetscBool      colflag,allcolumns=PETSC_FALSE;
3814 
3815   PetscFunctionBegin;
3816   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3817   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3818   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3819 
3820   /* Check for special case: each processor gets entire matrix columns */
3821   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3822   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3823   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3824 
3825   if (call ==  MAT_REUSE_MATRIX) {
3826     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3827     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3828     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3829   } else {
3830     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3831   }
3832 
3833   /*
3834       m - number of local rows
3835       n - number of columns (same on all processors)
3836       rstart - first row in new global matrix generated
3837   */
3838   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3839   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3840   if (call == MAT_INITIAL_MATRIX) {
3841     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3842     ii  = aij->i;
3843     jj  = aij->j;
3844 
3845     /*
3846         Determine the number of non-zeros in the diagonal and off-diagonal
3847         portions of the matrix in order to do correct preallocation
3848     */
3849 
3850     /* first get start and end of "diagonal" columns */
3851     if (csize == PETSC_DECIDE) {
3852       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3853       if (mglobal == n) { /* square matrix */
3854         nlocal = m;
3855       } else {
3856         nlocal = n/size + ((n % size) > rank);
3857       }
3858     } else {
3859       nlocal = csize;
3860     }
3861     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3862     rstart = rend - nlocal;
3863     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3864 
3865     /* next, compute all the lengths */
3866     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3867     olens = dlens + m;
3868     for (i=0; i<m; i++) {
3869       jend = ii[i+1] - ii[i];
3870       olen = 0;
3871       dlen = 0;
3872       for (j=0; j<jend; j++) {
3873         if (*jj < rstart || *jj >= rend) olen++;
3874         else dlen++;
3875         jj++;
3876       }
3877       olens[i] = olen;
3878       dlens[i] = dlen;
3879     }
3880     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3881     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3882     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3883     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3884     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3885     ierr = PetscFree(dlens);CHKERRQ(ierr);
3886   } else {
3887     PetscInt ml,nl;
3888 
3889     M    = *newmat;
3890     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3891     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3892     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3893     /*
3894          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3895        rather than the slower MatSetValues().
3896     */
3897     M->was_assembled = PETSC_TRUE;
3898     M->assembled     = PETSC_FALSE;
3899   }
3900   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3901   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3902   ii   = aij->i;
3903   jj   = aij->j;
3904   aa   = aij->a;
3905   for (i=0; i<m; i++) {
3906     row   = rstart + i;
3907     nz    = ii[i+1] - ii[i];
3908     cwork = jj;     jj += nz;
3909     vwork = aa;     aa += nz;
3910     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3911   }
3912 
3913   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3914   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3915   *newmat = M;
3916 
3917   /* save submatrix used in processor for next request */
3918   if (call ==  MAT_INITIAL_MATRIX) {
3919     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3920     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3921   }
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3926 {
3927   PetscInt       m,cstart, cend,j,nnz,i,d;
3928   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3929   const PetscInt *JJ;
3930   PetscErrorCode ierr;
3931   PetscBool      nooffprocentries;
3932 
3933   PetscFunctionBegin;
3934   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3935 
3936   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3937   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3938   m      = B->rmap->n;
3939   cstart = B->cmap->rstart;
3940   cend   = B->cmap->rend;
3941   rstart = B->rmap->rstart;
3942 
3943   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3944 
3945 #if defined(PETSC_USE_DEBUG)
3946   for (i=0; i<m; i++) {
3947     nnz = Ii[i+1]- Ii[i];
3948     JJ  = J + Ii[i];
3949     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3950     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3951     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3952   }
3953 #endif
3954 
3955   for (i=0; i<m; i++) {
3956     nnz     = Ii[i+1]- Ii[i];
3957     JJ      = J + Ii[i];
3958     nnz_max = PetscMax(nnz_max,nnz);
3959     d       = 0;
3960     for (j=0; j<nnz; j++) {
3961       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3962     }
3963     d_nnz[i] = d;
3964     o_nnz[i] = nnz - d;
3965   }
3966   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3967   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3968 
3969   for (i=0; i<m; i++) {
3970     ii   = i + rstart;
3971     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3972   }
3973   nooffprocentries    = B->nooffprocentries;
3974   B->nooffprocentries = PETSC_TRUE;
3975   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3977   B->nooffprocentries = nooffprocentries;
3978 
3979   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3980   PetscFunctionReturn(0);
3981 }
3982 
3983 /*@
3984    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3985    (the default parallel PETSc format).
3986 
3987    Collective
3988 
3989    Input Parameters:
3990 +  B - the matrix
3991 .  i - the indices into j for the start of each local row (starts with zero)
3992 .  j - the column indices for each local row (starts with zero)
3993 -  v - optional values in the matrix
3994 
3995    Level: developer
3996 
3997    Notes:
3998        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3999      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4000      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4001 
4002        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4003 
4004        The format which is used for the sparse matrix input, is equivalent to a
4005     row-major ordering.. i.e for the following matrix, the input data expected is
4006     as shown
4007 
4008 $        1 0 0
4009 $        2 0 3     P0
4010 $       -------
4011 $        4 5 6     P1
4012 $
4013 $     Process0 [P0]: rows_owned=[0,1]
4014 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4015 $        j =  {0,0,2}  [size = 3]
4016 $        v =  {1,2,3}  [size = 3]
4017 $
4018 $     Process1 [P1]: rows_owned=[2]
4019 $        i =  {0,3}    [size = nrow+1  = 1+1]
4020 $        j =  {0,1,2}  [size = 3]
4021 $        v =  {4,5,6}  [size = 3]
4022 
4023 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4024           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4025 @*/
4026 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4027 {
4028   PetscErrorCode ierr;
4029 
4030   PetscFunctionBegin;
4031   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 /*@C
4036    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4037    (the default parallel PETSc format).  For good matrix assembly performance
4038    the user should preallocate the matrix storage by setting the parameters
4039    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4040    performance can be increased by more than a factor of 50.
4041 
4042    Collective
4043 
4044    Input Parameters:
4045 +  B - the matrix
4046 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4047            (same value is used for all local rows)
4048 .  d_nnz - array containing the number of nonzeros in the various rows of the
4049            DIAGONAL portion of the local submatrix (possibly different for each row)
4050            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4051            The size of this array is equal to the number of local rows, i.e 'm'.
4052            For matrices that will be factored, you must leave room for (and set)
4053            the diagonal entry even if it is zero.
4054 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4055            submatrix (same value is used for all local rows).
4056 -  o_nnz - array containing the number of nonzeros in the various rows of the
4057            OFF-DIAGONAL portion of the local submatrix (possibly different for
4058            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4059            structure. The size of this array is equal to the number
4060            of local rows, i.e 'm'.
4061 
4062    If the *_nnz parameter is given then the *_nz parameter is ignored
4063 
4064    The AIJ format (also called the Yale sparse matrix format or
4065    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4066    storage.  The stored row and column indices begin with zero.
4067    See Users-Manual: ch_mat for details.
4068 
4069    The parallel matrix is partitioned such that the first m0 rows belong to
4070    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4071    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4072 
4073    The DIAGONAL portion of the local submatrix of a processor can be defined
4074    as the submatrix which is obtained by extraction the part corresponding to
4075    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4076    first row that belongs to the processor, r2 is the last row belonging to
4077    the this processor, and c1-c2 is range of indices of the local part of a
4078    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4079    common case of a square matrix, the row and column ranges are the same and
4080    the DIAGONAL part is also square. The remaining portion of the local
4081    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4082 
4083    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4084 
4085    You can call MatGetInfo() to get information on how effective the preallocation was;
4086    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4087    You can also run with the option -info and look for messages with the string
4088    malloc in them to see if additional memory allocation was needed.
4089 
4090    Example usage:
4091 
4092    Consider the following 8x8 matrix with 34 non-zero values, that is
4093    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4094    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4095    as follows:
4096 
4097 .vb
4098             1  2  0  |  0  3  0  |  0  4
4099     Proc0   0  5  6  |  7  0  0  |  8  0
4100             9  0 10  | 11  0  0  | 12  0
4101     -------------------------------------
4102            13  0 14  | 15 16 17  |  0  0
4103     Proc1   0 18  0  | 19 20 21  |  0  0
4104             0  0  0  | 22 23  0  | 24  0
4105     -------------------------------------
4106     Proc2  25 26 27  |  0  0 28  | 29  0
4107            30  0  0  | 31 32 33  |  0 34
4108 .ve
4109 
4110    This can be represented as a collection of submatrices as:
4111 
4112 .vb
4113       A B C
4114       D E F
4115       G H I
4116 .ve
4117 
4118    Where the submatrices A,B,C are owned by proc0, D,E,F are
4119    owned by proc1, G,H,I are owned by proc2.
4120 
4121    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4122    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4123    The 'M','N' parameters are 8,8, and have the same values on all procs.
4124 
4125    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4126    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4127    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4128    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4129    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4130    matrix, ans [DF] as another SeqAIJ matrix.
4131 
4132    When d_nz, o_nz parameters are specified, d_nz storage elements are
4133    allocated for every row of the local diagonal submatrix, and o_nz
4134    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4135    One way to choose d_nz and o_nz is to use the max nonzerors per local
4136    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4137    In this case, the values of d_nz,o_nz are:
4138 .vb
4139      proc0 : dnz = 2, o_nz = 2
4140      proc1 : dnz = 3, o_nz = 2
4141      proc2 : dnz = 1, o_nz = 4
4142 .ve
4143    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4144    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4145    for proc3. i.e we are using 12+15+10=37 storage locations to store
4146    34 values.
4147 
4148    When d_nnz, o_nnz parameters are specified, the storage is specified
4149    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4150    In the above case the values for d_nnz,o_nnz are:
4151 .vb
4152      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4153      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4154      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4155 .ve
4156    Here the space allocated is sum of all the above values i.e 34, and
4157    hence pre-allocation is perfect.
4158 
4159    Level: intermediate
4160 
4161 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4162           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4163 @*/
4164 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4165 {
4166   PetscErrorCode ierr;
4167 
4168   PetscFunctionBegin;
4169   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4170   PetscValidType(B,1);
4171   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 /*@
4176      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4177          CSR format for the local rows.
4178 
4179    Collective
4180 
4181    Input Parameters:
4182 +  comm - MPI communicator
4183 .  m - number of local rows (Cannot be PETSC_DECIDE)
4184 .  n - This value should be the same as the local size used in creating the
4185        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4186        calculated if N is given) For square matrices n is almost always m.
4187 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4188 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4189 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4190 .   j - column indices
4191 -   a - matrix values
4192 
4193    Output Parameter:
4194 .   mat - the matrix
4195 
4196    Level: intermediate
4197 
4198    Notes:
4199        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4200      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4201      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4202 
4203        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4204 
4205        The format which is used for the sparse matrix input, is equivalent to a
4206     row-major ordering.. i.e for the following matrix, the input data expected is
4207     as shown
4208 
4209        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4210 
4211 $        1 0 0
4212 $        2 0 3     P0
4213 $       -------
4214 $        4 5 6     P1
4215 $
4216 $     Process0 [P0]: rows_owned=[0,1]
4217 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4218 $        j =  {0,0,2}  [size = 3]
4219 $        v =  {1,2,3}  [size = 3]
4220 $
4221 $     Process1 [P1]: rows_owned=[2]
4222 $        i =  {0,3}    [size = nrow+1  = 1+1]
4223 $        j =  {0,1,2}  [size = 3]
4224 $        v =  {4,5,6}  [size = 3]
4225 
4226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4227           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4228 @*/
4229 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4230 {
4231   PetscErrorCode ierr;
4232 
4233   PetscFunctionBegin;
4234   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4235   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4236   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4237   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4238   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4239   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4240   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4241   PetscFunctionReturn(0);
4242 }
4243 
4244 /*@
4245      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4246          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4247 
4248    Collective
4249 
4250    Input Parameters:
4251 +  mat - the matrix
4252 .  m - number of local rows (Cannot be PETSC_DECIDE)
4253 .  n - This value should be the same as the local size used in creating the
4254        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4255        calculated if N is given) For square matrices n is almost always m.
4256 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4257 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4258 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4259 .  J - column indices
4260 -  v - matrix values
4261 
4262    Level: intermediate
4263 
4264 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4265           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4266 @*/
4267 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4268 {
4269   PetscErrorCode ierr;
4270   PetscInt       cstart,nnz,i,j;
4271   PetscInt       *ld;
4272   PetscBool      nooffprocentries;
4273   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4274   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4275   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4276   const PetscInt *Adi = Ad->i;
4277   PetscInt       ldi,Iii,md;
4278 
4279   PetscFunctionBegin;
4280   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4281   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4282   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4283   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4284 
4285   cstart = mat->cmap->rstart;
4286   if (!Aij->ld) {
4287     /* count number of entries below block diagonal */
4288     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4289     Aij->ld = ld;
4290     for (i=0; i<m; i++) {
4291       nnz  = Ii[i+1]- Ii[i];
4292       j     = 0;
4293       while  (J[j] < cstart && j < nnz) {j++;}
4294       J    += nnz;
4295       ld[i] = j;
4296     }
4297   } else {
4298     ld = Aij->ld;
4299   }
4300 
4301   for (i=0; i<m; i++) {
4302     nnz  = Ii[i+1]- Ii[i];
4303     Iii  = Ii[i];
4304     ldi  = ld[i];
4305     md   = Adi[i+1]-Adi[i];
4306     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4307     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4308     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4309     ad  += md;
4310     ao  += nnz - md;
4311   }
4312   nooffprocentries      = mat->nooffprocentries;
4313   mat->nooffprocentries = PETSC_TRUE;
4314   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4315   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4316   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4317   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4318   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4319   mat->nooffprocentries = nooffprocentries;
4320   PetscFunctionReturn(0);
4321 }
4322 
4323 /*@C
4324    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4325    (the default parallel PETSc format).  For good matrix assembly performance
4326    the user should preallocate the matrix storage by setting the parameters
4327    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4328    performance can be increased by more than a factor of 50.
4329 
4330    Collective
4331 
4332    Input Parameters:
4333 +  comm - MPI communicator
4334 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4335            This value should be the same as the local size used in creating the
4336            y vector for the matrix-vector product y = Ax.
4337 .  n - This value should be the same as the local size used in creating the
4338        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4339        calculated if N is given) For square matrices n is almost always m.
4340 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4341 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4342 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4343            (same value is used for all local rows)
4344 .  d_nnz - array containing the number of nonzeros in the various rows of the
4345            DIAGONAL portion of the local submatrix (possibly different for each row)
4346            or NULL, if d_nz is used to specify the nonzero structure.
4347            The size of this array is equal to the number of local rows, i.e 'm'.
4348 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4349            submatrix (same value is used for all local rows).
4350 -  o_nnz - array containing the number of nonzeros in the various rows of the
4351            OFF-DIAGONAL portion of the local submatrix (possibly different for
4352            each row) or NULL, if o_nz is used to specify the nonzero
4353            structure. The size of this array is equal to the number
4354            of local rows, i.e 'm'.
4355 
4356    Output Parameter:
4357 .  A - the matrix
4358 
4359    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4360    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4361    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4362 
4363    Notes:
4364    If the *_nnz parameter is given then the *_nz parameter is ignored
4365 
4366    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4367    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4368    storage requirements for this matrix.
4369 
4370    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4371    processor than it must be used on all processors that share the object for
4372    that argument.
4373 
4374    The user MUST specify either the local or global matrix dimensions
4375    (possibly both).
4376 
4377    The parallel matrix is partitioned across processors such that the
4378    first m0 rows belong to process 0, the next m1 rows belong to
4379    process 1, the next m2 rows belong to process 2 etc.. where
4380    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4381    values corresponding to [m x N] submatrix.
4382 
4383    The columns are logically partitioned with the n0 columns belonging
4384    to 0th partition, the next n1 columns belonging to the next
4385    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4386 
4387    The DIAGONAL portion of the local submatrix on any given processor
4388    is the submatrix corresponding to the rows and columns m,n
4389    corresponding to the given processor. i.e diagonal matrix on
4390    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4391    etc. The remaining portion of the local submatrix [m x (N-n)]
4392    constitute the OFF-DIAGONAL portion. The example below better
4393    illustrates this concept.
4394 
4395    For a square global matrix we define each processor's diagonal portion
4396    to be its local rows and the corresponding columns (a square submatrix);
4397    each processor's off-diagonal portion encompasses the remainder of the
4398    local matrix (a rectangular submatrix).
4399 
4400    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4401 
4402    When calling this routine with a single process communicator, a matrix of
4403    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4404    type of communicator, use the construction mechanism
4405 .vb
4406      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4407 .ve
4408 
4409 $     MatCreate(...,&A);
4410 $     MatSetType(A,MATMPIAIJ);
4411 $     MatSetSizes(A, m,n,M,N);
4412 $     MatMPIAIJSetPreallocation(A,...);
4413 
4414    By default, this format uses inodes (identical nodes) when possible.
4415    We search for consecutive rows with the same nonzero structure, thereby
4416    reusing matrix information to achieve increased efficiency.
4417 
4418    Options Database Keys:
4419 +  -mat_no_inode  - Do not use inodes
4420 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4421 
4422 
4423 
4424    Example usage:
4425 
4426    Consider the following 8x8 matrix with 34 non-zero values, that is
4427    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4428    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4429    as follows
4430 
4431 .vb
4432             1  2  0  |  0  3  0  |  0  4
4433     Proc0   0  5  6  |  7  0  0  |  8  0
4434             9  0 10  | 11  0  0  | 12  0
4435     -------------------------------------
4436            13  0 14  | 15 16 17  |  0  0
4437     Proc1   0 18  0  | 19 20 21  |  0  0
4438             0  0  0  | 22 23  0  | 24  0
4439     -------------------------------------
4440     Proc2  25 26 27  |  0  0 28  | 29  0
4441            30  0  0  | 31 32 33  |  0 34
4442 .ve
4443 
4444    This can be represented as a collection of submatrices as
4445 
4446 .vb
4447       A B C
4448       D E F
4449       G H I
4450 .ve
4451 
4452    Where the submatrices A,B,C are owned by proc0, D,E,F are
4453    owned by proc1, G,H,I are owned by proc2.
4454 
4455    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4456    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4457    The 'M','N' parameters are 8,8, and have the same values on all procs.
4458 
4459    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4460    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4461    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4462    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4463    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4464    matrix, ans [DF] as another SeqAIJ matrix.
4465 
4466    When d_nz, o_nz parameters are specified, d_nz storage elements are
4467    allocated for every row of the local diagonal submatrix, and o_nz
4468    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4469    One way to choose d_nz and o_nz is to use the max nonzerors per local
4470    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4471    In this case, the values of d_nz,o_nz are
4472 .vb
4473      proc0 : dnz = 2, o_nz = 2
4474      proc1 : dnz = 3, o_nz = 2
4475      proc2 : dnz = 1, o_nz = 4
4476 .ve
4477    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4478    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4479    for proc3. i.e we are using 12+15+10=37 storage locations to store
4480    34 values.
4481 
4482    When d_nnz, o_nnz parameters are specified, the storage is specified
4483    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4484    In the above case the values for d_nnz,o_nnz are
4485 .vb
4486      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4487      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4488      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4489 .ve
4490    Here the space allocated is sum of all the above values i.e 34, and
4491    hence pre-allocation is perfect.
4492 
4493    Level: intermediate
4494 
4495 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4496           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4497 @*/
4498 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4499 {
4500   PetscErrorCode ierr;
4501   PetscMPIInt    size;
4502 
4503   PetscFunctionBegin;
4504   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4505   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4506   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4507   if (size > 1) {
4508     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4509     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4510   } else {
4511     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4512     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4513   }
4514   PetscFunctionReturn(0);
4515 }
4516 
4517 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4518 {
4519   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4520   PetscBool      flg;
4521   PetscErrorCode ierr;
4522 
4523   PetscFunctionBegin;
4524   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4525   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4526   if (Ad)     *Ad     = a->A;
4527   if (Ao)     *Ao     = a->B;
4528   if (colmap) *colmap = a->garray;
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4533 {
4534   PetscErrorCode ierr;
4535   PetscInt       m,N,i,rstart,nnz,Ii;
4536   PetscInt       *indx;
4537   PetscScalar    *values;
4538 
4539   PetscFunctionBegin;
4540   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4541   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4542     PetscInt       *dnz,*onz,sum,bs,cbs;
4543 
4544     if (n == PETSC_DECIDE) {
4545       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4546     }
4547     /* Check sum(n) = N */
4548     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4549     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4550 
4551     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4552     rstart -= m;
4553 
4554     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4555     for (i=0; i<m; i++) {
4556       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4557       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4558       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4559     }
4560 
4561     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4562     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4563     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4564     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4565     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4566     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4567     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4568     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4569   }
4570 
4571   /* numeric phase */
4572   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4573   for (i=0; i<m; i++) {
4574     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4575     Ii   = i + rstart;
4576     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4577     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4578   }
4579   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4580   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4581   PetscFunctionReturn(0);
4582 }
4583 
4584 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4585 {
4586   PetscErrorCode    ierr;
4587   PetscMPIInt       rank;
4588   PetscInt          m,N,i,rstart,nnz;
4589   size_t            len;
4590   const PetscInt    *indx;
4591   PetscViewer       out;
4592   char              *name;
4593   Mat               B;
4594   const PetscScalar *values;
4595 
4596   PetscFunctionBegin;
4597   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4598   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4599   /* Should this be the type of the diagonal block of A? */
4600   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4601   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4602   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4603   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4604   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4605   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4606   for (i=0; i<m; i++) {
4607     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4608     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4609     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4610   }
4611   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4613 
4614   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4615   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4616   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4617   sprintf(name,"%s.%d",outfile,rank);
4618   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4619   ierr = PetscFree(name);CHKERRQ(ierr);
4620   ierr = MatView(B,out);CHKERRQ(ierr);
4621   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4622   ierr = MatDestroy(&B);CHKERRQ(ierr);
4623   PetscFunctionReturn(0);
4624 }
4625 
4626 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4627 {
4628   PetscErrorCode      ierr;
4629   Mat_Merge_SeqsToMPI *merge;
4630   PetscContainer      container;
4631 
4632   PetscFunctionBegin;
4633   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4634   if (container) {
4635     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4636     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4637     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4638     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4639     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4640     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4641     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4642     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4643     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4644     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4645     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4646     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4647     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4648     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4649     ierr = PetscFree(merge);CHKERRQ(ierr);
4650     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4651   }
4652   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4653   PetscFunctionReturn(0);
4654 }
4655 
4656 #include <../src/mat/utils/freespace.h>
4657 #include <petscbt.h>
4658 
4659 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4660 {
4661   PetscErrorCode      ierr;
4662   MPI_Comm            comm;
4663   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4664   PetscMPIInt         size,rank,taga,*len_s;
4665   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4666   PetscInt            proc,m;
4667   PetscInt            **buf_ri,**buf_rj;
4668   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4669   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4670   MPI_Request         *s_waits,*r_waits;
4671   MPI_Status          *status;
4672   MatScalar           *aa=a->a;
4673   MatScalar           **abuf_r,*ba_i;
4674   Mat_Merge_SeqsToMPI *merge;
4675   PetscContainer      container;
4676 
4677   PetscFunctionBegin;
4678   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4679   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4680 
4681   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4682   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4683 
4684   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4685   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4686 
4687   bi     = merge->bi;
4688   bj     = merge->bj;
4689   buf_ri = merge->buf_ri;
4690   buf_rj = merge->buf_rj;
4691 
4692   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4693   owners = merge->rowmap->range;
4694   len_s  = merge->len_s;
4695 
4696   /* send and recv matrix values */
4697   /*-----------------------------*/
4698   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4699   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4700 
4701   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4702   for (proc=0,k=0; proc<size; proc++) {
4703     if (!len_s[proc]) continue;
4704     i    = owners[proc];
4705     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4706     k++;
4707   }
4708 
4709   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4710   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4711   ierr = PetscFree(status);CHKERRQ(ierr);
4712 
4713   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4714   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4715 
4716   /* insert mat values of mpimat */
4717   /*----------------------------*/
4718   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4719   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4720 
4721   for (k=0; k<merge->nrecv; k++) {
4722     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4723     nrows       = *(buf_ri_k[k]);
4724     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4725     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4726   }
4727 
4728   /* set values of ba */
4729   m = merge->rowmap->n;
4730   for (i=0; i<m; i++) {
4731     arow = owners[rank] + i;
4732     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4733     bnzi = bi[i+1] - bi[i];
4734     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4735 
4736     /* add local non-zero vals of this proc's seqmat into ba */
4737     anzi   = ai[arow+1] - ai[arow];
4738     aj     = a->j + ai[arow];
4739     aa     = a->a + ai[arow];
4740     nextaj = 0;
4741     for (j=0; nextaj<anzi; j++) {
4742       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4743         ba_i[j] += aa[nextaj++];
4744       }
4745     }
4746 
4747     /* add received vals into ba */
4748     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4749       /* i-th row */
4750       if (i == *nextrow[k]) {
4751         anzi   = *(nextai[k]+1) - *nextai[k];
4752         aj     = buf_rj[k] + *(nextai[k]);
4753         aa     = abuf_r[k] + *(nextai[k]);
4754         nextaj = 0;
4755         for (j=0; nextaj<anzi; j++) {
4756           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4757             ba_i[j] += aa[nextaj++];
4758           }
4759         }
4760         nextrow[k]++; nextai[k]++;
4761       }
4762     }
4763     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4764   }
4765   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4766   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4767 
4768   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4769   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4770   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4771   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4772   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4773   PetscFunctionReturn(0);
4774 }
4775 
4776 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4777 {
4778   PetscErrorCode      ierr;
4779   Mat                 B_mpi;
4780   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4781   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4782   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4783   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4784   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4785   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4786   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4787   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4788   MPI_Status          *status;
4789   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4790   PetscBT             lnkbt;
4791   Mat_Merge_SeqsToMPI *merge;
4792   PetscContainer      container;
4793 
4794   PetscFunctionBegin;
4795   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4796 
4797   /* make sure it is a PETSc comm */
4798   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4799   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4800   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4801 
4802   ierr = PetscNew(&merge);CHKERRQ(ierr);
4803   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4804 
4805   /* determine row ownership */
4806   /*---------------------------------------------------------*/
4807   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4808   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4809   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4810   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4811   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4812   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4813   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4814 
4815   m      = merge->rowmap->n;
4816   owners = merge->rowmap->range;
4817 
4818   /* determine the number of messages to send, their lengths */
4819   /*---------------------------------------------------------*/
4820   len_s = merge->len_s;
4821 
4822   len          = 0; /* length of buf_si[] */
4823   merge->nsend = 0;
4824   for (proc=0; proc<size; proc++) {
4825     len_si[proc] = 0;
4826     if (proc == rank) {
4827       len_s[proc] = 0;
4828     } else {
4829       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4830       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4831     }
4832     if (len_s[proc]) {
4833       merge->nsend++;
4834       nrows = 0;
4835       for (i=owners[proc]; i<owners[proc+1]; i++) {
4836         if (ai[i+1] > ai[i]) nrows++;
4837       }
4838       len_si[proc] = 2*(nrows+1);
4839       len         += len_si[proc];
4840     }
4841   }
4842 
4843   /* determine the number and length of messages to receive for ij-structure */
4844   /*-------------------------------------------------------------------------*/
4845   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4846   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4847 
4848   /* post the Irecv of j-structure */
4849   /*-------------------------------*/
4850   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4851   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4852 
4853   /* post the Isend of j-structure */
4854   /*--------------------------------*/
4855   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4856 
4857   for (proc=0, k=0; proc<size; proc++) {
4858     if (!len_s[proc]) continue;
4859     i    = owners[proc];
4860     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4861     k++;
4862   }
4863 
4864   /* receives and sends of j-structure are complete */
4865   /*------------------------------------------------*/
4866   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4867   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4868 
4869   /* send and recv i-structure */
4870   /*---------------------------*/
4871   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4872   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4873 
4874   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4875   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4876   for (proc=0,k=0; proc<size; proc++) {
4877     if (!len_s[proc]) continue;
4878     /* form outgoing message for i-structure:
4879          buf_si[0]:                 nrows to be sent
4880                [1:nrows]:           row index (global)
4881                [nrows+1:2*nrows+1]: i-structure index
4882     */
4883     /*-------------------------------------------*/
4884     nrows       = len_si[proc]/2 - 1;
4885     buf_si_i    = buf_si + nrows+1;
4886     buf_si[0]   = nrows;
4887     buf_si_i[0] = 0;
4888     nrows       = 0;
4889     for (i=owners[proc]; i<owners[proc+1]; i++) {
4890       anzi = ai[i+1] - ai[i];
4891       if (anzi) {
4892         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4893         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4894         nrows++;
4895       }
4896     }
4897     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4898     k++;
4899     buf_si += len_si[proc];
4900   }
4901 
4902   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4903   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4904 
4905   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4906   for (i=0; i<merge->nrecv; i++) {
4907     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4908   }
4909 
4910   ierr = PetscFree(len_si);CHKERRQ(ierr);
4911   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4912   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4913   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4914   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4915   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4916   ierr = PetscFree(status);CHKERRQ(ierr);
4917 
4918   /* compute a local seq matrix in each processor */
4919   /*----------------------------------------------*/
4920   /* allocate bi array and free space for accumulating nonzero column info */
4921   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4922   bi[0] = 0;
4923 
4924   /* create and initialize a linked list */
4925   nlnk = N+1;
4926   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4927 
4928   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4929   len  = ai[owners[rank+1]] - ai[owners[rank]];
4930   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4931 
4932   current_space = free_space;
4933 
4934   /* determine symbolic info for each local row */
4935   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4936 
4937   for (k=0; k<merge->nrecv; k++) {
4938     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4939     nrows       = *buf_ri_k[k];
4940     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4941     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4942   }
4943 
4944   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4945   len  = 0;
4946   for (i=0; i<m; i++) {
4947     bnzi = 0;
4948     /* add local non-zero cols of this proc's seqmat into lnk */
4949     arow  = owners[rank] + i;
4950     anzi  = ai[arow+1] - ai[arow];
4951     aj    = a->j + ai[arow];
4952     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4953     bnzi += nlnk;
4954     /* add received col data into lnk */
4955     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4956       if (i == *nextrow[k]) { /* i-th row */
4957         anzi  = *(nextai[k]+1) - *nextai[k];
4958         aj    = buf_rj[k] + *nextai[k];
4959         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4960         bnzi += nlnk;
4961         nextrow[k]++; nextai[k]++;
4962       }
4963     }
4964     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4965 
4966     /* if free space is not available, make more free space */
4967     if (current_space->local_remaining<bnzi) {
4968       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4969       nspacedouble++;
4970     }
4971     /* copy data into free space, then initialize lnk */
4972     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4973     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4974 
4975     current_space->array           += bnzi;
4976     current_space->local_used      += bnzi;
4977     current_space->local_remaining -= bnzi;
4978 
4979     bi[i+1] = bi[i] + bnzi;
4980   }
4981 
4982   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4983 
4984   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4985   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4986   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4987 
4988   /* create symbolic parallel matrix B_mpi */
4989   /*---------------------------------------*/
4990   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4991   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4992   if (n==PETSC_DECIDE) {
4993     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4994   } else {
4995     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4996   }
4997   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4998   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4999   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5000   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5001   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5002 
5003   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5004   B_mpi->assembled    = PETSC_FALSE;
5005   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5006   merge->bi           = bi;
5007   merge->bj           = bj;
5008   merge->buf_ri       = buf_ri;
5009   merge->buf_rj       = buf_rj;
5010   merge->coi          = NULL;
5011   merge->coj          = NULL;
5012   merge->owners_co    = NULL;
5013 
5014   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5015 
5016   /* attach the supporting struct to B_mpi for reuse */
5017   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5018   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5019   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5020   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5021   *mpimat = B_mpi;
5022 
5023   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5024   PetscFunctionReturn(0);
5025 }
5026 
5027 /*@C
5028       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5029                  matrices from each processor
5030 
5031     Collective
5032 
5033    Input Parameters:
5034 +    comm - the communicators the parallel matrix will live on
5035 .    seqmat - the input sequential matrices
5036 .    m - number of local rows (or PETSC_DECIDE)
5037 .    n - number of local columns (or PETSC_DECIDE)
5038 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5039 
5040    Output Parameter:
5041 .    mpimat - the parallel matrix generated
5042 
5043     Level: advanced
5044 
5045    Notes:
5046      The dimensions of the sequential matrix in each processor MUST be the same.
5047      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5048      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5049 @*/
5050 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5051 {
5052   PetscErrorCode ierr;
5053   PetscMPIInt    size;
5054 
5055   PetscFunctionBegin;
5056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5057   if (size == 1) {
5058     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5059     if (scall == MAT_INITIAL_MATRIX) {
5060       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5061     } else {
5062       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5063     }
5064     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5065     PetscFunctionReturn(0);
5066   }
5067   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5068   if (scall == MAT_INITIAL_MATRIX) {
5069     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5070   }
5071   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5072   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5073   PetscFunctionReturn(0);
5074 }
5075 
5076 /*@
5077      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5078           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5079           with MatGetSize()
5080 
5081     Not Collective
5082 
5083    Input Parameters:
5084 +    A - the matrix
5085 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5086 
5087    Output Parameter:
5088 .    A_loc - the local sequential matrix generated
5089 
5090     Level: developer
5091 
5092 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5093 
5094 @*/
5095 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5096 {
5097   PetscErrorCode ierr;
5098   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5099   Mat_SeqAIJ     *mat,*a,*b;
5100   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5101   MatScalar      *aa,*ba,*cam;
5102   PetscScalar    *ca;
5103   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5104   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5105   PetscBool      match;
5106   MPI_Comm       comm;
5107   PetscMPIInt    size;
5108 
5109   PetscFunctionBegin;
5110   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5111   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5112   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5113   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5114   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5115 
5116   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5117   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5118   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5119   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5120   aa = a->a; ba = b->a;
5121   if (scall == MAT_INITIAL_MATRIX) {
5122     if (size == 1) {
5123       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5124       PetscFunctionReturn(0);
5125     }
5126 
5127     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5128     ci[0] = 0;
5129     for (i=0; i<am; i++) {
5130       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5131     }
5132     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5133     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5134     k    = 0;
5135     for (i=0; i<am; i++) {
5136       ncols_o = bi[i+1] - bi[i];
5137       ncols_d = ai[i+1] - ai[i];
5138       /* off-diagonal portion of A */
5139       for (jo=0; jo<ncols_o; jo++) {
5140         col = cmap[*bj];
5141         if (col >= cstart) break;
5142         cj[k]   = col; bj++;
5143         ca[k++] = *ba++;
5144       }
5145       /* diagonal portion of A */
5146       for (j=0; j<ncols_d; j++) {
5147         cj[k]   = cstart + *aj++;
5148         ca[k++] = *aa++;
5149       }
5150       /* off-diagonal portion of A */
5151       for (j=jo; j<ncols_o; j++) {
5152         cj[k]   = cmap[*bj++];
5153         ca[k++] = *ba++;
5154       }
5155     }
5156     /* put together the new matrix */
5157     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5158     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5159     /* Since these are PETSc arrays, change flags to free them as necessary. */
5160     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5161     mat->free_a  = PETSC_TRUE;
5162     mat->free_ij = PETSC_TRUE;
5163     mat->nonew   = 0;
5164   } else if (scall == MAT_REUSE_MATRIX) {
5165     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5166     ci = mat->i; cj = mat->j; cam = mat->a;
5167     for (i=0; i<am; i++) {
5168       /* off-diagonal portion of A */
5169       ncols_o = bi[i+1] - bi[i];
5170       for (jo=0; jo<ncols_o; jo++) {
5171         col = cmap[*bj];
5172         if (col >= cstart) break;
5173         *cam++ = *ba++; bj++;
5174       }
5175       /* diagonal portion of A */
5176       ncols_d = ai[i+1] - ai[i];
5177       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5178       /* off-diagonal portion of A */
5179       for (j=jo; j<ncols_o; j++) {
5180         *cam++ = *ba++; bj++;
5181       }
5182     }
5183   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5184   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5185   PetscFunctionReturn(0);
5186 }
5187 
5188 /*@C
5189      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5190 
5191     Not Collective
5192 
5193    Input Parameters:
5194 +    A - the matrix
5195 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5196 -    row, col - index sets of rows and columns to extract (or NULL)
5197 
5198    Output Parameter:
5199 .    A_loc - the local sequential matrix generated
5200 
5201     Level: developer
5202 
5203 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5204 
5205 @*/
5206 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5207 {
5208   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5209   PetscErrorCode ierr;
5210   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5211   IS             isrowa,iscola;
5212   Mat            *aloc;
5213   PetscBool      match;
5214 
5215   PetscFunctionBegin;
5216   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5217   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5218   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5219   if (!row) {
5220     start = A->rmap->rstart; end = A->rmap->rend;
5221     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5222   } else {
5223     isrowa = *row;
5224   }
5225   if (!col) {
5226     start = A->cmap->rstart;
5227     cmap  = a->garray;
5228     nzA   = a->A->cmap->n;
5229     nzB   = a->B->cmap->n;
5230     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5231     ncols = 0;
5232     for (i=0; i<nzB; i++) {
5233       if (cmap[i] < start) idx[ncols++] = cmap[i];
5234       else break;
5235     }
5236     imark = i;
5237     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5238     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5239     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5240   } else {
5241     iscola = *col;
5242   }
5243   if (scall != MAT_INITIAL_MATRIX) {
5244     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5245     aloc[0] = *A_loc;
5246   }
5247   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5248   if (!col) { /* attach global id of condensed columns */
5249     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5250   }
5251   *A_loc = aloc[0];
5252   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5253   if (!row) {
5254     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5255   }
5256   if (!col) {
5257     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5258   }
5259   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5260   PetscFunctionReturn(0);
5261 }
5262 
5263 /*
5264  * Destroy a mat that may be composed with PetscSF communication objects.
5265  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5266  * */
5267 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5268 {
5269   PetscSF          sf,osf;
5270   PetscErrorCode   ierr;
5271 
5272   PetscFunctionBegin;
5273   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5274   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5275   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5276   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5277   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5278   PetscFunctionReturn(0);
5279 }
5280 
5281 /*
5282  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5283  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5284  * on a global size.
5285  * */
5286 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5287 {
5288   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5289   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5290   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols;
5291   PetscSFNode              *iremote,*oiremote;
5292   const PetscInt           *lrowindices;
5293   PetscErrorCode           ierr;
5294   PetscSF                  sf,osf;
5295   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5296   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5297   MPI_Comm                 comm;
5298   ISLocalToGlobalMapping   mapping;
5299 
5300   PetscFunctionBegin;
5301   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5302   /* plocalsize is the number of roots
5303    * nrows is the number of leaves
5304    * */
5305   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5306   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5307   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5308   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5309   for (i=0;i<nrows;i++) {
5310     /* Find a remote index and an owner for a row
5311      * The row could be local or remote
5312      * */
5313     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5314     iremote[i].index = lidx;
5315     iremote[i].rank  = owner;
5316   }
5317   /* Create SF to communicate how many nonzero columns for each row */
5318   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5319   /* SF will figure out the number of nonzero colunms for each row, and their
5320    * offsets
5321    * */
5322   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5323   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5324   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5325   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5326   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5327   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5328   roffsets[0] = 0;
5329   roffsets[1] = 0;
5330   for (i=0;i<plocalsize;i++) {
5331     /* diag */
5332     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5333     /* off diag */
5334     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5335     /* compute offsets so that we relative location for each row */
5336     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5337     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5338   }
5339   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5340   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5341   /* 'r' means root, and 'l' means leaf */
5342   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5343   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5344   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5345   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5346   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5347   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5348   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5349   dntotalcols = 0;
5350   ontotalcols = 0;
5351   for (i=0;i<nrows;i++) {
5352     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5353     /* diag */
5354     dntotalcols += nlcols[i*2+0];
5355     /* off diag */
5356     ontotalcols += nlcols[i*2+1];
5357   }
5358   /* We do not need to figure the right number of columns
5359    * since all the calculations will be done by going through the raw data
5360    * */
5361   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,nrows,0,pnnz,P_oth);CHKERRQ(ierr);
5362   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5363   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5364   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5365   /* diag */
5366   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5367   /* off diag */
5368   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5369   /* diag */
5370   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5371   /* off diag */
5372   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5373   dntotalcols = 0;
5374   ontotalcols = 0;
5375   ntotalcols  = 0;
5376   for (i=0;i<nrows;i++) {
5377     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5378     /* Set iremote for diag matrix */
5379     for (j=0;j<nlcols[i*2+0];j++) {
5380       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5381       iremote[dntotalcols].rank    = owner;
5382       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5383       ilocal[dntotalcols++]        = ntotalcols++;
5384     }
5385     /* off diag */
5386     for (j=0;j<nlcols[i*2+1];j++) {
5387       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5388       oiremote[ontotalcols].rank    = owner;
5389       oilocal[ontotalcols++]        = ntotalcols++;
5390     }
5391   }
5392   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5393   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5394   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5395   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5396   /* P serves as roots and P_oth is leaves
5397    * Diag matrix
5398    * */
5399   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5400   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5401   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5402 
5403   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5404   /* Off diag */
5405   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5406   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5407   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5408   /* We operate on the matrix internal data for saving memory */
5409   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5410   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5411   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5412   /* Convert to global indices for diag matrix */
5413   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5414   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5415   /* We want P_oth store global indices */
5416   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5417   /* Use memory scalable approach */
5418   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5419   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5420   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5421   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5422   /* Convert back to local indices */
5423   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5424   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5425   nout = 0;
5426   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5427   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5428   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5429   /* Exchange values */
5430   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5431   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5432   /* Stop PETSc from shrinking memory */
5433   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5434   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5435   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5436   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5437   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5438   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5439   /* ``New MatDestroy" takes care of PetscSF objects as well */
5440   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5441   PetscFunctionReturn(0);
5442 }
5443 
5444 /*
5445  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5446  * This supports MPIAIJ and MAIJ
5447  * */
5448 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,MatReuse reuse,Mat *P_oth)
5449 {
5450   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5451   Mat_SeqAIJ            *ao=(Mat_SeqAIJ*)(a->B)->data,*p_oth;
5452   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5453   IS                    rows;
5454   PetscHSetI            ht;
5455   PetscInt              i,htsize,*rowindices,off;
5456   MPI_Comm              comm;
5457   PetscSF               sf,osf;
5458   PetscErrorCode        ierr;
5459 
5460   PetscFunctionBegin;
5461   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5462   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5463    *  and then create a submatrix (that often is an overlapping matrix)
5464    * */
5465   if (reuse==MAT_INITIAL_MATRIX) {
5466     /* Use a hash table to figure out unique keys */
5467     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
5468     for (i=0;i<ao->i[a->B->rmap->n];i++) {
5469       /* Convert to global keys */
5470       ierr = PetscHSetIAdd(ht,a->garray[ao->j[i]]);CHKERRQ(ierr);
5471     }
5472     ierr = PetscHSetIGetSize(ht,&htsize);CHKERRQ(ierr);
5473     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5474     off = 0;
5475     ierr = PetscHSetIGetElems(ht,&off,rowindices);CHKERRQ(ierr);
5476     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
5477     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5478     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5479     /* In case, the matrix was already created but users want to recreate the matrix */
5480     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5481     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5482     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5483   } else if (reuse==MAT_REUSE_MATRIX) {
5484     /* If matrix was already created, we simply update values using SF objects
5485      * that as attached to the matrix ealier.
5486      *  */
5487     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5488     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5489     if (!sf || !osf) {
5490       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5491     }
5492     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5493     /* Update values in place */
5494     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5495     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5496     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5497     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5498   } else {
5499     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5500   }
5501 
5502   PetscFunctionReturn(0);
5503 }
5504 
5505 /*@C
5506     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5507 
5508     Collective on Mat
5509 
5510    Input Parameters:
5511 +    A,B - the matrices in mpiaij format
5512 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5513 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5514 
5515    Output Parameter:
5516 +    rowb, colb - index sets of rows and columns of B to extract
5517 -    B_seq - the sequential matrix generated
5518 
5519     Level: developer
5520 
5521 @*/
5522 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5523 {
5524   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5525   PetscErrorCode ierr;
5526   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5527   IS             isrowb,iscolb;
5528   Mat            *bseq=NULL;
5529 
5530   PetscFunctionBegin;
5531   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5532     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5533   }
5534   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5535 
5536   if (scall == MAT_INITIAL_MATRIX) {
5537     start = A->cmap->rstart;
5538     cmap  = a->garray;
5539     nzA   = a->A->cmap->n;
5540     nzB   = a->B->cmap->n;
5541     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5542     ncols = 0;
5543     for (i=0; i<nzB; i++) {  /* row < local row index */
5544       if (cmap[i] < start) idx[ncols++] = cmap[i];
5545       else break;
5546     }
5547     imark = i;
5548     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5549     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5550     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5551     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5552   } else {
5553     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5554     isrowb  = *rowb; iscolb = *colb;
5555     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5556     bseq[0] = *B_seq;
5557   }
5558   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5559   *B_seq = bseq[0];
5560   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5561   if (!rowb) {
5562     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5563   } else {
5564     *rowb = isrowb;
5565   }
5566   if (!colb) {
5567     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5568   } else {
5569     *colb = iscolb;
5570   }
5571   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5572   PetscFunctionReturn(0);
5573 }
5574 
5575 /*
5576     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5577     of the OFF-DIAGONAL portion of local A
5578 
5579     Collective on Mat
5580 
5581    Input Parameters:
5582 +    A,B - the matrices in mpiaij format
5583 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5584 
5585    Output Parameter:
5586 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5587 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5588 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5589 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5590 
5591     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5592      for this matrix. This is not desirable..
5593 
5594     Level: developer
5595 
5596 */
5597 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5598 {
5599   PetscErrorCode         ierr;
5600   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5601   Mat_SeqAIJ             *b_oth;
5602   VecScatter             ctx;
5603   MPI_Comm               comm;
5604   const PetscMPIInt      *rprocs,*sprocs;
5605   const PetscInt         *srow,*rstarts,*sstarts;
5606   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5607   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5608   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5609   MPI_Request            *rwaits = NULL,*swaits = NULL;
5610   MPI_Status             rstatus;
5611   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5612 
5613   PetscFunctionBegin;
5614   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5615   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5616 
5617   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5618     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5619   }
5620   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5621   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5622 
5623   if (size == 1) {
5624     startsj_s = NULL;
5625     bufa_ptr  = NULL;
5626     *B_oth    = NULL;
5627     PetscFunctionReturn(0);
5628   }
5629 
5630   ctx = a->Mvctx;
5631   tag = ((PetscObject)ctx)->tag;
5632 
5633   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5634   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5635   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5636   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5637   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5638   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5639   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5640 
5641   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5642   if (scall == MAT_INITIAL_MATRIX) {
5643     /* i-array */
5644     /*---------*/
5645     /*  post receives */
5646     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5647     for (i=0; i<nrecvs; i++) {
5648       rowlen = rvalues + rstarts[i]*rbs;
5649       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5650       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5651     }
5652 
5653     /* pack the outgoing message */
5654     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5655 
5656     sstartsj[0] = 0;
5657     rstartsj[0] = 0;
5658     len         = 0; /* total length of j or a array to be sent */
5659     if (nsends) {
5660       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5661       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5662     }
5663     for (i=0; i<nsends; i++) {
5664       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5665       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5666       for (j=0; j<nrows; j++) {
5667         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5668         for (l=0; l<sbs; l++) {
5669           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5670 
5671           rowlen[j*sbs+l] = ncols;
5672 
5673           len += ncols;
5674           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5675         }
5676         k++;
5677       }
5678       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5679 
5680       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5681     }
5682     /* recvs and sends of i-array are completed */
5683     i = nrecvs;
5684     while (i--) {
5685       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5686     }
5687     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5688     ierr = PetscFree(svalues);CHKERRQ(ierr);
5689 
5690     /* allocate buffers for sending j and a arrays */
5691     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5692     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5693 
5694     /* create i-array of B_oth */
5695     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5696 
5697     b_othi[0] = 0;
5698     len       = 0; /* total length of j or a array to be received */
5699     k         = 0;
5700     for (i=0; i<nrecvs; i++) {
5701       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5702       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5703       for (j=0; j<nrows; j++) {
5704         b_othi[k+1] = b_othi[k] + rowlen[j];
5705         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5706         k++;
5707       }
5708       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5709     }
5710     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5711 
5712     /* allocate space for j and a arrrays of B_oth */
5713     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5714     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5715 
5716     /* j-array */
5717     /*---------*/
5718     /*  post receives of j-array */
5719     for (i=0; i<nrecvs; i++) {
5720       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5721       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5722     }
5723 
5724     /* pack the outgoing message j-array */
5725     if (nsends) k = sstarts[0];
5726     for (i=0; i<nsends; i++) {
5727       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5728       bufJ  = bufj+sstartsj[i];
5729       for (j=0; j<nrows; j++) {
5730         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5731         for (ll=0; ll<sbs; ll++) {
5732           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5733           for (l=0; l<ncols; l++) {
5734             *bufJ++ = cols[l];
5735           }
5736           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5737         }
5738       }
5739       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5740     }
5741 
5742     /* recvs and sends of j-array are completed */
5743     i = nrecvs;
5744     while (i--) {
5745       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5746     }
5747     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5748   } else if (scall == MAT_REUSE_MATRIX) {
5749     sstartsj = *startsj_s;
5750     rstartsj = *startsj_r;
5751     bufa     = *bufa_ptr;
5752     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5753     b_otha   = b_oth->a;
5754   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5755 
5756   /* a-array */
5757   /*---------*/
5758   /*  post receives of a-array */
5759   for (i=0; i<nrecvs; i++) {
5760     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5761     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5762   }
5763 
5764   /* pack the outgoing message a-array */
5765   if (nsends) k = sstarts[0];
5766   for (i=0; i<nsends; i++) {
5767     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5768     bufA  = bufa+sstartsj[i];
5769     for (j=0; j<nrows; j++) {
5770       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5771       for (ll=0; ll<sbs; ll++) {
5772         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5773         for (l=0; l<ncols; l++) {
5774           *bufA++ = vals[l];
5775         }
5776         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5777       }
5778     }
5779     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5780   }
5781   /* recvs and sends of a-array are completed */
5782   i = nrecvs;
5783   while (i--) {
5784     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5785   }
5786   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5787   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5788 
5789   if (scall == MAT_INITIAL_MATRIX) {
5790     /* put together the new matrix */
5791     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5792 
5793     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5794     /* Since these are PETSc arrays, change flags to free them as necessary. */
5795     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5796     b_oth->free_a  = PETSC_TRUE;
5797     b_oth->free_ij = PETSC_TRUE;
5798     b_oth->nonew   = 0;
5799 
5800     ierr = PetscFree(bufj);CHKERRQ(ierr);
5801     if (!startsj_s || !bufa_ptr) {
5802       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5803       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5804     } else {
5805       *startsj_s = sstartsj;
5806       *startsj_r = rstartsj;
5807       *bufa_ptr  = bufa;
5808     }
5809   }
5810 
5811   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5812   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5813   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5814   PetscFunctionReturn(0);
5815 }
5816 
5817 /*@C
5818   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5819 
5820   Not Collective
5821 
5822   Input Parameters:
5823 . A - The matrix in mpiaij format
5824 
5825   Output Parameter:
5826 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5827 . colmap - A map from global column index to local index into lvec
5828 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5829 
5830   Level: developer
5831 
5832 @*/
5833 #if defined(PETSC_USE_CTABLE)
5834 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5835 #else
5836 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5837 #endif
5838 {
5839   Mat_MPIAIJ *a;
5840 
5841   PetscFunctionBegin;
5842   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5843   PetscValidPointer(lvec, 2);
5844   PetscValidPointer(colmap, 3);
5845   PetscValidPointer(multScatter, 4);
5846   a = (Mat_MPIAIJ*) A->data;
5847   if (lvec) *lvec = a->lvec;
5848   if (colmap) *colmap = a->colmap;
5849   if (multScatter) *multScatter = a->Mvctx;
5850   PetscFunctionReturn(0);
5851 }
5852 
5853 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5855 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5856 #if defined(PETSC_HAVE_MKL_SPARSE)
5857 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5858 #endif
5859 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5860 #if defined(PETSC_HAVE_ELEMENTAL)
5861 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5862 #endif
5863 #if defined(PETSC_HAVE_HYPRE)
5864 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5865 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5866 #endif
5867 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5868 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5869 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5870 
5871 /*
5872     Computes (B'*A')' since computing B*A directly is untenable
5873 
5874                n                       p                          p
5875         (              )       (              )         (                  )
5876       m (      A       )  *  n (       B      )   =   m (         C        )
5877         (              )       (              )         (                  )
5878 
5879 */
5880 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5881 {
5882   PetscErrorCode ierr;
5883   Mat            At,Bt,Ct;
5884 
5885   PetscFunctionBegin;
5886   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5887   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5888   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5889   ierr = MatDestroy(&At);CHKERRQ(ierr);
5890   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5891   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5892   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5893   PetscFunctionReturn(0);
5894 }
5895 
5896 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5897 {
5898   PetscErrorCode ierr;
5899   PetscInt       m=A->rmap->n,n=B->cmap->n;
5900   Mat            Cmat;
5901 
5902   PetscFunctionBegin;
5903   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5904   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5905   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5906   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5907   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5908   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5909   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5910   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5911 
5912   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5913 
5914   *C = Cmat;
5915   PetscFunctionReturn(0);
5916 }
5917 
5918 /* ----------------------------------------------------------------*/
5919 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5920 {
5921   PetscErrorCode ierr;
5922 
5923   PetscFunctionBegin;
5924   if (scall == MAT_INITIAL_MATRIX) {
5925     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5926     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5927     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5928   }
5929   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5930   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5931   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5932   PetscFunctionReturn(0);
5933 }
5934 
5935 /*MC
5936    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5937 
5938    Options Database Keys:
5939 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5940 
5941   Level: beginner
5942 
5943 .seealso: MatCreateAIJ()
5944 M*/
5945 
5946 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5947 {
5948   Mat_MPIAIJ     *b;
5949   PetscErrorCode ierr;
5950   PetscMPIInt    size;
5951 
5952   PetscFunctionBegin;
5953   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5954 
5955   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5956   B->data       = (void*)b;
5957   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5958   B->assembled  = PETSC_FALSE;
5959   B->insertmode = NOT_SET_VALUES;
5960   b->size       = size;
5961 
5962   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5963 
5964   /* build cache for off array entries formed */
5965   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5966 
5967   b->donotstash  = PETSC_FALSE;
5968   b->colmap      = 0;
5969   b->garray      = 0;
5970   b->roworiented = PETSC_TRUE;
5971 
5972   /* stuff used for matrix vector multiply */
5973   b->lvec  = NULL;
5974   b->Mvctx = NULL;
5975 
5976   /* stuff for MatGetRow() */
5977   b->rowindices   = 0;
5978   b->rowvalues    = 0;
5979   b->getrowactive = PETSC_FALSE;
5980 
5981   /* flexible pointer used in CUSP/CUSPARSE classes */
5982   b->spptr = NULL;
5983 
5984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5985   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5986   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5987   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5988   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5989   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5990   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5991   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5992   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5993   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5994 #if defined(PETSC_HAVE_MKL_SPARSE)
5995   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5996 #endif
5997   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5998   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5999 #if defined(PETSC_HAVE_ELEMENTAL)
6000   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6001 #endif
6002 #if defined(PETSC_HAVE_HYPRE)
6003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6004 #endif
6005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6010 #if defined(PETSC_HAVE_HYPRE)
6011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6012 #endif
6013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6014   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6015   PetscFunctionReturn(0);
6016 }
6017 
6018 /*@C
6019      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6020          and "off-diagonal" part of the matrix in CSR format.
6021 
6022    Collective
6023 
6024    Input Parameters:
6025 +  comm - MPI communicator
6026 .  m - number of local rows (Cannot be PETSC_DECIDE)
6027 .  n - This value should be the same as the local size used in creating the
6028        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6029        calculated if N is given) For square matrices n is almost always m.
6030 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6031 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6032 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6033 .   j - column indices
6034 .   a - matrix values
6035 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6036 .   oj - column indices
6037 -   oa - matrix values
6038 
6039    Output Parameter:
6040 .   mat - the matrix
6041 
6042    Level: advanced
6043 
6044    Notes:
6045        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6046        must free the arrays once the matrix has been destroyed and not before.
6047 
6048        The i and j indices are 0 based
6049 
6050        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6051 
6052        This sets local rows and cannot be used to set off-processor values.
6053 
6054        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6055        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6056        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6057        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6058        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6059        communication if it is known that only local entries will be set.
6060 
6061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6062           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6063 @*/
6064 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6065 {
6066   PetscErrorCode ierr;
6067   Mat_MPIAIJ     *maij;
6068 
6069   PetscFunctionBegin;
6070   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6071   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6072   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6073   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6074   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6075   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6076   maij = (Mat_MPIAIJ*) (*mat)->data;
6077 
6078   (*mat)->preallocated = PETSC_TRUE;
6079 
6080   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6081   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6082 
6083   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6084   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6085 
6086   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6087   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6088   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6089   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6090 
6091   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6092   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6093   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6094   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6095   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6096   PetscFunctionReturn(0);
6097 }
6098 
6099 /*
6100     Special version for direct calls from Fortran
6101 */
6102 #include <petsc/private/fortranimpl.h>
6103 
6104 /* Change these macros so can be used in void function */
6105 #undef CHKERRQ
6106 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6107 #undef SETERRQ2
6108 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6109 #undef SETERRQ3
6110 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6111 #undef SETERRQ
6112 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6113 
6114 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6115 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6116 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6117 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6118 #else
6119 #endif
6120 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6121 {
6122   Mat            mat  = *mmat;
6123   PetscInt       m    = *mm, n = *mn;
6124   InsertMode     addv = *maddv;
6125   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6126   PetscScalar    value;
6127   PetscErrorCode ierr;
6128 
6129   MatCheckPreallocated(mat,1);
6130   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6131 
6132 #if defined(PETSC_USE_DEBUG)
6133   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6134 #endif
6135   {
6136     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6137     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6138     PetscBool roworiented = aij->roworiented;
6139 
6140     /* Some Variables required in the macro */
6141     Mat        A                 = aij->A;
6142     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6143     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6144     MatScalar  *aa               = a->a;
6145     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6146     Mat        B                 = aij->B;
6147     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6148     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6149     MatScalar  *ba               = b->a;
6150 
6151     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6152     PetscInt  nonew = a->nonew;
6153     MatScalar *ap1,*ap2;
6154 
6155     PetscFunctionBegin;
6156     for (i=0; i<m; i++) {
6157       if (im[i] < 0) continue;
6158 #if defined(PETSC_USE_DEBUG)
6159       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6160 #endif
6161       if (im[i] >= rstart && im[i] < rend) {
6162         row      = im[i] - rstart;
6163         lastcol1 = -1;
6164         rp1      = aj + ai[row];
6165         ap1      = aa + ai[row];
6166         rmax1    = aimax[row];
6167         nrow1    = ailen[row];
6168         low1     = 0;
6169         high1    = nrow1;
6170         lastcol2 = -1;
6171         rp2      = bj + bi[row];
6172         ap2      = ba + bi[row];
6173         rmax2    = bimax[row];
6174         nrow2    = bilen[row];
6175         low2     = 0;
6176         high2    = nrow2;
6177 
6178         for (j=0; j<n; j++) {
6179           if (roworiented) value = v[i*n+j];
6180           else value = v[i+j*m];
6181           if (in[j] >= cstart && in[j] < cend) {
6182             col = in[j] - cstart;
6183             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6184             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6185           } else if (in[j] < 0) continue;
6186 #if defined(PETSC_USE_DEBUG)
6187           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6188           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6189 #endif
6190           else {
6191             if (mat->was_assembled) {
6192               if (!aij->colmap) {
6193                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6194               }
6195 #if defined(PETSC_USE_CTABLE)
6196               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6197               col--;
6198 #else
6199               col = aij->colmap[in[j]] - 1;
6200 #endif
6201               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6202               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6203                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6204                 col  =  in[j];
6205                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6206                 B     = aij->B;
6207                 b     = (Mat_SeqAIJ*)B->data;
6208                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6209                 rp2   = bj + bi[row];
6210                 ap2   = ba + bi[row];
6211                 rmax2 = bimax[row];
6212                 nrow2 = bilen[row];
6213                 low2  = 0;
6214                 high2 = nrow2;
6215                 bm    = aij->B->rmap->n;
6216                 ba    = b->a;
6217               }
6218             } else col = in[j];
6219             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6220           }
6221         }
6222       } else if (!aij->donotstash) {
6223         if (roworiented) {
6224           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6225         } else {
6226           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6227         }
6228       }
6229     }
6230   }
6231   PetscFunctionReturnVoid();
6232 }
6233