xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8dd7b8de55100f02b4a49c7581bb3bd4e4b5a7df)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
218   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) { \
444             ap1[_i] += value;   \
445             /* Not sure LogFlops will slow dow the code or not */ \
446             (void)PetscLogFlops(1.0);   \
447            } \
448           else                    ap1[_i] = value; \
449           goto a_noinsert; \
450         } \
451       }  \
452       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
453       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
454       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
455       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
456       N = nrow1++ - 1; a->nz++; high1++; \
457       /* shift up all the later entries in this row */ \
458       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
459       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
460       rp1[_i] = col;  \
461       ap1[_i] = value;  \
462       A->nonzerostate++;\
463       a_noinsert: ; \
464       ailen[row] = nrow1; \
465 }
466 
467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
468   { \
469     if (col <= lastcol2) low2 = 0;                        \
470     else high2 = nrow2;                                   \
471     lastcol2 = col;                                       \
472     while (high2-low2 > 5) {                              \
473       t = (low2+high2)/2;                                 \
474       if (rp2[t] > col) high2 = t;                        \
475       else             low2  = t;                         \
476     }                                                     \
477     for (_i=low2; _i<high2; _i++) {                       \
478       if (rp2[_i] > col) break;                           \
479       if (rp2[_i] == col) {                               \
480         if (addv == ADD_VALUES) {                         \
481           ap2[_i] += value;                               \
482           (void)PetscLogFlops(1.0);                       \
483         }                                                 \
484         else                    ap2[_i] = value;          \
485         goto b_noinsert;                                  \
486       }                                                   \
487     }                                                     \
488     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
489     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
490     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
491     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
492     N = nrow2++ - 1; b->nz++; high2++;                    \
493     /* shift up all the later entries in this row */      \
494     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
495     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
496     rp2[_i] = col;                                        \
497     ap2[_i] = value;                                      \
498     B->nonzerostate++;                                    \
499     b_noinsert: ;                                         \
500     bilen[row] = nrow2;                                   \
501   }
502 
503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
504 {
505   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
506   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
507   PetscErrorCode ierr;
508   PetscInt       l,*garray = mat->garray,diag;
509 
510   PetscFunctionBegin;
511   /* code only works for square matrices A */
512 
513   /* find size of row to the left of the diagonal part */
514   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
515   row  = row - diag;
516   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
517     if (garray[b->j[b->i[row]+l]] > diag) break;
518   }
519   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
520 
521   /* diagonal part */
522   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
523 
524   /* right of diagonal part */
525   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
530 {
531   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
532   PetscScalar    value;
533   PetscErrorCode ierr;
534   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
535   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
536   PetscBool      roworiented = aij->roworiented;
537 
538   /* Some Variables required in the macro */
539   Mat        A                 = aij->A;
540   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
541   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
542   MatScalar  *aa               = a->a;
543   PetscBool  ignorezeroentries = a->ignorezeroentries;
544   Mat        B                 = aij->B;
545   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
546   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
547   MatScalar  *ba               = b->a;
548 
549   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
550   PetscInt  nonew;
551   MatScalar *ap1,*ap2;
552 
553   PetscFunctionBegin;
554   for (i=0; i<m; i++) {
555     if (im[i] < 0) continue;
556 #if defined(PETSC_USE_DEBUG)
557     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
558 #endif
559     if (im[i] >= rstart && im[i] < rend) {
560       row      = im[i] - rstart;
561       lastcol1 = -1;
562       rp1      = aj + ai[row];
563       ap1      = aa + ai[row];
564       rmax1    = aimax[row];
565       nrow1    = ailen[row];
566       low1     = 0;
567       high1    = nrow1;
568       lastcol2 = -1;
569       rp2      = bj + bi[row];
570       ap2      = ba + bi[row];
571       rmax2    = bimax[row];
572       nrow2    = bilen[row];
573       low2     = 0;
574       high2    = nrow2;
575 
576       for (j=0; j<n; j++) {
577         if (roworiented) value = v[i*n+j];
578         else             value = v[i+j*m];
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
583           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
584         } else if (in[j] < 0) continue;
585 #if defined(PETSC_USE_DEBUG)
586         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
587 #endif
588         else {
589           if (mat->was_assembled) {
590             if (!aij->colmap) {
591               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
592             }
593 #if defined(PETSC_USE_CTABLE)
594             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
595             col--;
596 #else
597             col = aij->colmap[in[j]] - 1;
598 #endif
599             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
600               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
601               col  =  in[j];
602               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
603               B     = aij->B;
604               b     = (Mat_SeqAIJ*)B->data;
605               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
606               rp2   = bj + bi[row];
607               ap2   = ba + bi[row];
608               rmax2 = bimax[row];
609               nrow2 = bilen[row];
610               low2  = 0;
611               high2 = nrow2;
612               bm    = aij->B->rmap->n;
613               ba    = b->a;
614             } else if (col < 0) {
615               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
616                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
617               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
618             }
619           } else col = in[j];
620           nonew = b->nonew;
621           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
622         }
623       }
624     } else {
625       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
626       if (!aij->donotstash) {
627         mat->assembled = PETSC_FALSE;
628         if (roworiented) {
629           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
630         } else {
631           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
632         }
633       }
634     }
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 /*
640     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
641     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
642     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
643 */
644 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
645 {
646   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
647   Mat            A           = aij->A; /* diagonal part of the matrix */
648   Mat            B           = aij->B; /* offdiagonal part of the matrix */
649   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
650   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
651   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
652   PetscInt       *ailen      = a->ilen,*aj = a->j;
653   PetscInt       *bilen      = b->ilen,*bj = b->j;
654   PetscInt       am          = aij->A->rmap->n,j;
655   PetscInt       diag_so_far = 0,dnz;
656   PetscInt       offd_so_far = 0,onz;
657 
658   PetscFunctionBegin;
659   /* Iterate over all rows of the matrix */
660   for (j=0; j<am; j++) {
661     dnz = onz = 0;
662     /*  Iterate over all non-zero columns of the current row */
663     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
664       /* If column is in the diagonal */
665       if (mat_j[col] >= cstart && mat_j[col] < cend) {
666         aj[diag_so_far++] = mat_j[col] - cstart;
667         dnz++;
668       } else { /* off-diagonal entries */
669         bj[offd_so_far++] = mat_j[col];
670         onz++;
671       }
672     }
673     ailen[j] = dnz;
674     bilen[j] = onz;
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 /*
680     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
681     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
682     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
683     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
684     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
685 */
686 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
687 {
688   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
689   Mat            A      = aij->A; /* diagonal part of the matrix */
690   Mat            B      = aij->B; /* offdiagonal part of the matrix */
691   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
692   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
693   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
694   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
695   PetscInt       *ailen = a->ilen,*aj = a->j;
696   PetscInt       *bilen = b->ilen,*bj = b->j;
697   PetscInt       am     = aij->A->rmap->n,j;
698   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
699   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
700   PetscScalar    *aa = a->a,*ba = b->a;
701 
702   PetscFunctionBegin;
703   /* Iterate over all rows of the matrix */
704   for (j=0; j<am; j++) {
705     dnz_row = onz_row = 0;
706     rowstart_offd = full_offd_i[j];
707     rowstart_diag = full_diag_i[j];
708     /*  Iterate over all non-zero columns of the current row */
709     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
710       /* If column is in the diagonal */
711       if (mat_j[col] >= cstart && mat_j[col] < cend) {
712         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
713         aa[rowstart_diag+dnz_row] = mat_a[col];
714         dnz_row++;
715       } else { /* off-diagonal entries */
716         bj[rowstart_offd+onz_row] = mat_j[col];
717         ba[rowstart_offd+onz_row] = mat_a[col];
718         onz_row++;
719       }
720     }
721     ailen[j] = dnz_row;
722     bilen[j] = onz_row;
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
728 {
729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
730   PetscErrorCode ierr;
731   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
732   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
733 
734   PetscFunctionBegin;
735   for (i=0; i<m; i++) {
736     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
737     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
738     if (idxm[i] >= rstart && idxm[i] < rend) {
739       row = idxm[i] - rstart;
740       for (j=0; j<n; j++) {
741         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
742         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
743         if (idxn[j] >= cstart && idxn[j] < cend) {
744           col  = idxn[j] - cstart;
745           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
746         } else {
747           if (!aij->colmap) {
748             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
749           }
750 #if defined(PETSC_USE_CTABLE)
751           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
752           col--;
753 #else
754           col = aij->colmap[idxn[j]] - 1;
755 #endif
756           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
757           else {
758             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
759           }
760         }
761       }
762     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
763   }
764   PetscFunctionReturn(0);
765 }
766 
767 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
768 
769 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
770 {
771   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
772   PetscErrorCode ierr;
773   PetscInt       nstash,reallocs;
774 
775   PetscFunctionBegin;
776   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
777 
778   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
779   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
780   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
785 {
786   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
787   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
788   PetscErrorCode ierr;
789   PetscMPIInt    n;
790   PetscInt       i,j,rstart,ncols,flg;
791   PetscInt       *row,*col;
792   PetscBool      other_disassembled;
793   PetscScalar    *val;
794 
795   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
796 
797   PetscFunctionBegin;
798   if (!aij->donotstash && !mat->nooffprocentries) {
799     while (1) {
800       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
801       if (!flg) break;
802 
803       for (i=0; i<n; ) {
804         /* Now identify the consecutive vals belonging to the same row */
805         for (j=i,rstart=row[j]; j<n; j++) {
806           if (row[j] != rstart) break;
807         }
808         if (j < n) ncols = j-i;
809         else       ncols = n-i;
810         /* Now assemble all these values with a single function call */
811         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
812 
813         i = j;
814       }
815     }
816     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
817   }
818   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
819   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
820 
821   /* determine if any processor has disassembled, if so we must
822      also disassemble ourselfs, in order that we may reassemble. */
823   /*
824      if nonzero structure of submatrix B cannot change then we know that
825      no processor disassembled thus we can skip this stuff
826   */
827   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
828     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
829     if (mat->was_assembled && !other_disassembled) {
830       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
831     }
832   }
833   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
834     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
835   }
836   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
839 
840   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
841 
842   aij->rowvalues = 0;
843 
844   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
845   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
846 
847   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
848   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
849     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
850     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
856 {
857   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
858   PetscErrorCode ierr;
859 
860   PetscFunctionBegin;
861   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
862   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
867 {
868   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
869   PetscObjectState sA, sB;
870   PetscInt        *lrows;
871   PetscInt         r, len;
872   PetscBool        cong, lch, gch;
873   PetscErrorCode   ierr;
874 
875   PetscFunctionBegin;
876   /* get locally owned rows */
877   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
878   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
879   /* fix right hand side if needed */
880   if (x && b) {
881     const PetscScalar *xx;
882     PetscScalar       *bb;
883 
884     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
885     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
886     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
887     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
888     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
889     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
890   }
891 
892   sA = mat->A->nonzerostate;
893   sB = mat->B->nonzerostate;
894 
895   if (diag != 0.0 && cong) {
896     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
897     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
898   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
899     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
900     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
901     PetscInt   nnwA, nnwB;
902     PetscBool  nnzA, nnzB;
903 
904     nnwA = aijA->nonew;
905     nnwB = aijB->nonew;
906     nnzA = aijA->keepnonzeropattern;
907     nnzB = aijB->keepnonzeropattern;
908     if (!nnzA) {
909       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
910       aijA->nonew = 0;
911     }
912     if (!nnzB) {
913       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
914       aijB->nonew = 0;
915     }
916     /* Must zero here before the next loop */
917     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
918     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
919     for (r = 0; r < len; ++r) {
920       const PetscInt row = lrows[r] + A->rmap->rstart;
921       if (row >= A->cmap->N) continue;
922       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
923     }
924     aijA->nonew = nnwA;
925     aijB->nonew = nnwB;
926   } else {
927     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
928     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
929   }
930   ierr = PetscFree(lrows);CHKERRQ(ierr);
931   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
933 
934   /* reduce nonzerostate */
935   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
936   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
937   if (gch) A->nonzerostate++;
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
942 {
943   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
944   PetscErrorCode    ierr;
945   PetscMPIInt       n = A->rmap->n;
946   PetscInt          i,j,r,m,p = 0,len = 0;
947   PetscInt          *lrows,*owners = A->rmap->range;
948   PetscSFNode       *rrows;
949   PetscSF           sf;
950   const PetscScalar *xx;
951   PetscScalar       *bb,*mask;
952   Vec               xmask,lmask;
953   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
954   const PetscInt    *aj, *ii,*ridx;
955   PetscScalar       *aa;
956 
957   PetscFunctionBegin;
958   /* Create SF where leaves are input rows and roots are owned rows */
959   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
960   for (r = 0; r < n; ++r) lrows[r] = -1;
961   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
962   for (r = 0; r < N; ++r) {
963     const PetscInt idx   = rows[r];
964     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
965     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
966       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
967     }
968     rrows[r].rank  = p;
969     rrows[r].index = rows[r] - owners[p];
970   }
971   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
972   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
973   /* Collect flags for rows to be zeroed */
974   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
975   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
976   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
977   /* Compress and put in row numbers */
978   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
979   /* zero diagonal part of matrix */
980   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
981   /* handle off diagonal part of matrix */
982   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
983   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
984   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
985   for (i=0; i<len; i++) bb[lrows[i]] = 1;
986   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
987   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
990   if (x && b) { /* this code is buggy when the row and column layout don't match */
991     PetscBool cong;
992 
993     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
994     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
995     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
998     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
999   }
1000   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1001   /* remove zeroed rows of off diagonal matrix */
1002   ii = aij->i;
1003   for (i=0; i<len; i++) {
1004     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1005   }
1006   /* loop over all elements of off process part of matrix zeroing removed columns*/
1007   if (aij->compressedrow.use) {
1008     m    = aij->compressedrow.nrows;
1009     ii   = aij->compressedrow.i;
1010     ridx = aij->compressedrow.rindex;
1011     for (i=0; i<m; i++) {
1012       n  = ii[i+1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij->a + ii[i];
1015 
1016       for (j=0; j<n; j++) {
1017         if (PetscAbsScalar(mask[*aj])) {
1018           if (b) bb[*ridx] -= *aa*xx[*aj];
1019           *aa = 0.0;
1020         }
1021         aa++;
1022         aj++;
1023       }
1024       ridx++;
1025     }
1026   } else { /* do not use compressed row format */
1027     m = l->B->rmap->n;
1028     for (i=0; i<m; i++) {
1029       n  = ii[i+1] - ii[i];
1030       aj = aij->j + ii[i];
1031       aa = aij->a + ii[i];
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[i] -= *aa*xx[*aj];
1035           *aa = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040     }
1041   }
1042   if (x && b) {
1043     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1044     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1045   }
1046   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1047   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1048   ierr = PetscFree(lrows);CHKERRQ(ierr);
1049 
1050   /* only change matrix nonzero state if pattern was allowed to be changed */
1051   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1052     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1053     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1059 {
1060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscInt       nt;
1063   VecScatter     Mvctx = a->Mvctx;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1067   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1068 
1069   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1071   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1072   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1077 {
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089   PetscErrorCode ierr;
1090   VecScatter     Mvctx = a->Mvctx;
1091 
1092   PetscFunctionBegin;
1093   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1094   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1095   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1096   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1097   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   /* do nondiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1109   /* do local part */
1110   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1111   /* add partial results together */
1112   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1118 {
1119   MPI_Comm       comm;
1120   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1121   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1122   IS             Me,Notme;
1123   PetscErrorCode ierr;
1124   PetscInt       M,N,first,last,*notme,i;
1125   PetscBool      lf;
1126   PetscMPIInt    size;
1127 
1128   PetscFunctionBegin;
1129   /* Easy test: symmetric diagonal block */
1130   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1131   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1132   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1133   if (!*f) PetscFunctionReturn(0);
1134   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1135   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1136   if (size == 1) PetscFunctionReturn(0);
1137 
1138   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1139   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1140   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1141   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1142   for (i=0; i<first; i++) notme[i] = i;
1143   for (i=last; i<M; i++) notme[i-last+first] = i;
1144   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1145   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1146   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1147   Aoff = Aoffs[0];
1148   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1149   Boff = Boffs[0];
1150   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1151   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1152   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1153   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1154   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1155   ierr = PetscFree(notme);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1160 {
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1169 {
1170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   /* do nondiagonal part */
1175   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1176   /* do local part */
1177   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1178   /* add partial results together */
1179   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 /*
1185   This only works correctly for square matrices where the subblock A->A is the
1186    diagonal block
1187 */
1188 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1189 {
1190   PetscErrorCode ierr;
1191   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1192 
1193   PetscFunctionBegin;
1194   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1195   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1196   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1207   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1212 {
1213   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217 #if defined(PETSC_USE_LOG)
1218   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1219 #endif
1220   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1221   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1222   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1223   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1224 #if defined(PETSC_USE_CTABLE)
1225   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1226 #else
1227   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1230   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1231   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1232   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1233   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1234   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1235   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1236 
1237   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1246 #if defined(PETSC_HAVE_ELEMENTAL)
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1248 #endif
1249 #if defined(PETSC_HAVE_HYPRE)
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1252 #endif
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1259 {
1260   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1261   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1262   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1263   PetscErrorCode ierr;
1264   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1265   int            fd;
1266   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1267   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1268   PetscScalar    *column_values;
1269   PetscInt       message_count,flowcontrolcount;
1270   FILE           *file;
1271 
1272   PetscFunctionBegin;
1273   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1274   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1275   nz   = A->nz + B->nz;
1276   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1277   if (!rank) {
1278     header[0] = MAT_FILE_CLASSID;
1279     header[1] = mat->rmap->N;
1280     header[2] = mat->cmap->N;
1281 
1282     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     /* get largest number of rows any processor has */
1285     rlen  = mat->rmap->n;
1286     range = mat->rmap->range;
1287     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1288   } else {
1289     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1290     rlen = mat->rmap->n;
1291   }
1292 
1293   /* load up the local row counts */
1294   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1295   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1296 
1297   /* store the row lengths to the file */
1298   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1299   if (!rank) {
1300     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1301     for (i=1; i<size; i++) {
1302       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1303       rlen = range[i+1] - range[i];
1304       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1306     }
1307     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1308   } else {
1309     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1310     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1311     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1312   }
1313   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1314 
1315   /* load up the local column indices */
1316   nzmax = nz; /* th processor needs space a largest processor needs */
1317   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1319   cnt   = 0;
1320   for (i=0; i<mat->rmap->n; i++) {
1321     for (j=B->i[i]; j<B->i[i+1]; j++) {
1322       if ((col = garray[B->j[j]]) > cstart) break;
1323       column_indices[cnt++] = col;
1324     }
1325     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1326     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1327   }
1328   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1329 
1330   /* store the column indices to the file */
1331   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1332   if (!rank) {
1333     MPI_Status status;
1334     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1337       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1338       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1339       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1341     }
1342     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1343   } else {
1344     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1345     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1346     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1348   }
1349   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1350 
1351   /* load up the local column values */
1352   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if (garray[B->j[j]] > cstart) break;
1357       column_values[cnt++] = B->a[j];
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1360     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1361   }
1362   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1363 
1364   /* store the column values to the file */
1365   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1366   if (!rank) {
1367     MPI_Status status;
1368     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1369     for (i=1; i<size; i++) {
1370       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1371       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1372       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1373       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1374       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1375     }
1376     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1377   } else {
1378     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1379     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1380     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1381     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1382   }
1383   ierr = PetscFree(column_values);CHKERRQ(ierr);
1384 
1385   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1386   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #include <petscdraw.h>
1391 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1392 {
1393   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1394   PetscErrorCode    ierr;
1395   PetscMPIInt       rank = aij->rank,size = aij->size;
1396   PetscBool         isdraw,iascii,isbinary;
1397   PetscViewer       sviewer;
1398   PetscViewerFormat format;
1399 
1400   PetscFunctionBegin;
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1402   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1404   if (iascii) {
1405     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1406     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1407       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1408       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1409       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1410       for (i=0; i<(PetscInt)size; i++) {
1411         nmax = PetscMax(nmax,nz[i]);
1412         nmin = PetscMin(nmin,nz[i]);
1413         navg += nz[i];
1414       }
1415       ierr = PetscFree(nz);CHKERRQ(ierr);
1416       navg = navg/size;
1417       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1418       PetscFunctionReturn(0);
1419     }
1420     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1421     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1422       MatInfo   info;
1423       PetscBool inodes;
1424 
1425       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1426       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1427       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1429       if (!inodes) {
1430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1431                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1432       } else {
1433         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1434                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1435       }
1436       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1437       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1438       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1440       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1442       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1443       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1446       PetscInt inodecount,inodelimit,*inodes;
1447       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1448       if (inodes) {
1449         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1450       } else {
1451         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1452       }
1453       PetscFunctionReturn(0);
1454     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1455       PetscFunctionReturn(0);
1456     }
1457   } else if (isbinary) {
1458     if (size == 1) {
1459       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1460       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1463     }
1464     PetscFunctionReturn(0);
1465   } else if (iascii && size == 1) {
1466     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1467     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1468     PetscFunctionReturn(0);
1469   } else if (isdraw) {
1470     PetscDraw draw;
1471     PetscBool isnull;
1472     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1473     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1474     if (isnull) PetscFunctionReturn(0);
1475   }
1476 
1477   { /* assemble the entire matrix onto first processor */
1478     Mat A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1484     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1485 /*  The commented code uses MatCreateSubMatrices instead */
1486 /*
1487     Mat *AA, A = NULL, Av;
1488     IS  isrow,iscol;
1489 
1490     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1491     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1492     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1493     if (!rank) {
1494        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1495        A    = AA[0];
1496        Av   = AA[0];
1497     }
1498     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1499 */
1500     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1501     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1502     /*
1503        Everyone has to call to draw the matrix since the graphics waits are
1504        synchronized across all processors that share the PetscDraw object
1505     */
1506     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1507     if (!rank) {
1508       if (((PetscObject)mat)->name) {
1509         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1510       }
1511       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1512     }
1513     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1514     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1515     ierr = MatDestroy(&A);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1521 {
1522   PetscErrorCode ierr;
1523   PetscBool      iascii,isdraw,issocket,isbinary;
1524 
1525   PetscFunctionBegin;
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1527   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1528   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1529   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1530   if (iascii || isdraw || isbinary || issocket) {
1531     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1537 {
1538   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1539   PetscErrorCode ierr;
1540   Vec            bb1 = 0;
1541   PetscBool      hasop;
1542 
1543   PetscFunctionBegin;
1544   if (flag == SOR_APPLY_UPPER) {
1545     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1546     PetscFunctionReturn(0);
1547   }
1548 
1549   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1550     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1551   }
1552 
1553   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1554     if (flag & SOR_ZERO_INITIAL_GUESS) {
1555       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1556       its--;
1557     }
1558 
1559     while (its--) {
1560       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1561       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1562 
1563       /* update rhs: bb1 = bb - B*x */
1564       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1565       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1566 
1567       /* local sweep */
1568       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1569     }
1570   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1571     if (flag & SOR_ZERO_INITIAL_GUESS) {
1572       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1573       its--;
1574     }
1575     while (its--) {
1576       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578 
1579       /* update rhs: bb1 = bb - B*x */
1580       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1581       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1582 
1583       /* local sweep */
1584       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1585     }
1586   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1587     if (flag & SOR_ZERO_INITIAL_GUESS) {
1588       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1589       its--;
1590     }
1591     while (its--) {
1592       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1594 
1595       /* update rhs: bb1 = bb - B*x */
1596       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1597       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1598 
1599       /* local sweep */
1600       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1601     }
1602   } else if (flag & SOR_EISENSTAT) {
1603     Vec xx1;
1604 
1605     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1607 
1608     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610     if (!mat->diag) {
1611       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1612       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1613     }
1614     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1615     if (hasop) {
1616       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1617     } else {
1618       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1619     }
1620     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1621 
1622     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1623 
1624     /* local sweep */
1625     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1626     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1627     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1628   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1629 
1630   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1631 
1632   matin->factorerrortype = mat->A->factorerrortype;
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1637 {
1638   Mat            aA,aB,Aperm;
1639   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1640   PetscScalar    *aa,*ba;
1641   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1642   PetscSF        rowsf,sf;
1643   IS             parcolp = NULL;
1644   PetscBool      done;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1649   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1650   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1651   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1652 
1653   /* Invert row permutation to find out where my rows should go */
1654   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1655   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1656   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1658   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1660 
1661   /* Invert column permutation to find out where my columns should go */
1662   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1663   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1664   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1665   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1666   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1667   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1668   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1669 
1670   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1671   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1672   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1673 
1674   /* Find out where my gcols should go */
1675   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1676   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1678   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1679   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1680   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1681   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1682   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1683 
1684   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1685   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1686   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     PetscInt row = rdest[i],rowner;
1689     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1690     for (j=ai[i]; j<ai[i+1]; j++) {
1691       PetscInt cowner,col = cdest[aj[j]];
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696     for (j=bi[i]; j<bi[i+1]; j++) {
1697       PetscInt cowner,col = gcdest[bj[j]];
1698       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1699       if (rowner == cowner) dnnz[i]++;
1700       else onnz[i]++;
1701     }
1702   }
1703   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1704   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1705   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1706   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1707   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1708 
1709   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1710   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1711   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1712   for (i=0; i<m; i++) {
1713     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1714     PetscInt j0,rowlen;
1715     rowlen = ai[i+1] - ai[i];
1716     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1717       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1718       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1719     }
1720     rowlen = bi[i+1] - bi[i];
1721     for (j0=j=0; j<rowlen; j0=j) {
1722       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1723       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1724     }
1725   }
1726   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1728   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1729   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1730   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1731   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1732   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1733   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1734   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1735   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1736   *B = Aperm;
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1741 {
1742   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1747   if (ghosts) *ghosts = aij->garray;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1752 {
1753   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1754   Mat            A    = mat->A,B = mat->B;
1755   PetscErrorCode ierr;
1756   PetscReal      isend[5],irecv[5];
1757 
1758   PetscFunctionBegin;
1759   info->block_size = 1.0;
1760   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1761 
1762   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1763   isend[3] = info->memory;  isend[4] = info->mallocs;
1764 
1765   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1766 
1767   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1768   isend[3] += info->memory;  isend[4] += info->mallocs;
1769   if (flag == MAT_LOCAL) {
1770     info->nz_used      = isend[0];
1771     info->nz_allocated = isend[1];
1772     info->nz_unneeded  = isend[2];
1773     info->memory       = isend[3];
1774     info->mallocs      = isend[4];
1775   } else if (flag == MAT_GLOBAL_MAX) {
1776     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1777 
1778     info->nz_used      = irecv[0];
1779     info->nz_allocated = irecv[1];
1780     info->nz_unneeded  = irecv[2];
1781     info->memory       = irecv[3];
1782     info->mallocs      = irecv[4];
1783   } else if (flag == MAT_GLOBAL_SUM) {
1784     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1785 
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   }
1792   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1793   info->fill_ratio_needed = 0;
1794   info->factor_mallocs    = 0;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1799 {
1800   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   switch (op) {
1805   case MAT_NEW_NONZERO_LOCATIONS:
1806   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1807   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1808   case MAT_KEEP_NONZERO_PATTERN:
1809   case MAT_NEW_NONZERO_LOCATION_ERR:
1810   case MAT_USE_INODES:
1811   case MAT_IGNORE_ZERO_ENTRIES:
1812     MatCheckPreallocated(A,1);
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_ROW_ORIENTED:
1817     MatCheckPreallocated(A,1);
1818     a->roworiented = flg;
1819 
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1822     break;
1823   case MAT_NEW_DIAGONALS:
1824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1825     break;
1826   case MAT_IGNORE_OFF_PROC_ENTRIES:
1827     a->donotstash = flg;
1828     break;
1829   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1830   case MAT_SPD:
1831   case MAT_SYMMETRIC:
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833   case MAT_HERMITIAN:
1834   case MAT_SYMMETRY_ETERNAL:
1835     break;
1836   case MAT_SUBMAT_SINGLEIS:
1837     A->submat_singleis = flg;
1838     break;
1839   case MAT_STRUCTURE_ONLY:
1840     /* The option is handled directly by MatSetOption() */
1841     break;
1842   default:
1843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1849 {
1850   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1851   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1852   PetscErrorCode ierr;
1853   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1854   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1855   PetscInt       *cmap,*idx_p;
1856 
1857   PetscFunctionBegin;
1858   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1859   mat->getrowactive = PETSC_TRUE;
1860 
1861   if (!mat->rowvalues && (idx || v)) {
1862     /*
1863         allocate enough space to hold information from the longest row.
1864     */
1865     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1866     PetscInt   max = 1,tmp;
1867     for (i=0; i<matin->rmap->n; i++) {
1868       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1869       if (max < tmp) max = tmp;
1870     }
1871     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1872   }
1873 
1874   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1875   lrow = row - rstart;
1876 
1877   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1878   if (!v)   {pvA = 0; pvB = 0;}
1879   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1880   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1881   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1882   nztot = nzA + nzB;
1883 
1884   cmap = mat->garray;
1885   if (v  || idx) {
1886     if (nztot) {
1887       /* Sort by increasing column numbers, assuming A and B already sorted */
1888       PetscInt imark = -1;
1889       if (v) {
1890         *v = v_p = mat->rowvalues;
1891         for (i=0; i<nzB; i++) {
1892           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1893           else break;
1894         }
1895         imark = i;
1896         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1897         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1898       }
1899       if (idx) {
1900         *idx = idx_p = mat->rowindices;
1901         if (imark > -1) {
1902           for (i=0; i<imark; i++) {
1903             idx_p[i] = cmap[cworkB[i]];
1904           }
1905         } else {
1906           for (i=0; i<nzB; i++) {
1907             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1908             else break;
1909           }
1910           imark = i;
1911         }
1912         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1913         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1914       }
1915     } else {
1916       if (idx) *idx = 0;
1917       if (v)   *v   = 0;
1918     }
1919   }
1920   *nz  = nztot;
1921   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1922   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1927 {
1928   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1929 
1930   PetscFunctionBegin;
1931   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1932   aij->getrowactive = PETSC_FALSE;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1937 {
1938   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1939   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       i,j,cstart = mat->cmap->rstart;
1942   PetscReal      sum = 0.0;
1943   MatScalar      *v;
1944 
1945   PetscFunctionBegin;
1946   if (aij->size == 1) {
1947     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1948   } else {
1949     if (type == NORM_FROBENIUS) {
1950       v = amat->a;
1951       for (i=0; i<amat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       v = bmat->a;
1955       for (i=0; i<bmat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1961     } else if (type == NORM_1) { /* max column norm */
1962       PetscReal *tmp,*tmp2;
1963       PetscInt  *jj,*garray = aij->garray;
1964       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1965       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v     = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1982     } else if (type == NORM_INFINITY) { /* max row norm */
1983       PetscReal ntemp = 0.0;
1984       for (j=0; j<aij->A->rmap->n; j++) {
1985         v   = amat->a + amat->i[j];
1986         sum = 0.0;
1987         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         v = bmat->a + bmat->i[j];
1991         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         if (sum > ntemp) ntemp = sum;
1995       }
1996       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1997       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1998     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2004 {
2005   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2006   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2007   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2008   PetscErrorCode ierr;
2009   Mat            B,A_diag,*B_diag;
2010   MatScalar      *array;
2011 
2012   PetscFunctionBegin;
2013   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2014   ai = Aloc->i; aj = Aloc->j;
2015   bi = Bloc->i; bj = Bloc->j;
2016   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2017     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2018     PetscSFNode          *oloc;
2019     PETSC_UNUSED PetscSF sf;
2020 
2021     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2022     /* compute d_nnz for preallocation */
2023     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2024     for (i=0; i<ai[ma]; i++) {
2025       d_nnz[aj[i]]++;
2026     }
2027     /* compute local off-diagonal contributions */
2028     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2029     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2030     /* map those to global */
2031     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2032     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2033     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2034     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2035     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2036     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2037     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2038 
2039     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2040     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2041     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2042     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2043     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2044     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2045   } else {
2046     B    = *matout;
2047     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2048   }
2049 
2050   b           = (Mat_MPIAIJ*)B->data;
2051   A_diag      = a->A;
2052   B_diag      = &b->A;
2053   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2054   A_diag_ncol = A_diag->cmap->N;
2055   B_diag_ilen = sub_B_diag->ilen;
2056   B_diag_i    = sub_B_diag->i;
2057 
2058   /* Set ilen for diagonal of B */
2059   for (i=0; i<A_diag_ncol; i++) {
2060     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2061   }
2062 
2063   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2064   very quickly (=without using MatSetValues), because all writes are local. */
2065   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2066 
2067   /* copy over the B part */
2068   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2069   array = Bloc->a;
2070   row   = A->rmap->rstart;
2071   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2072   cols_tmp = cols;
2073   for (i=0; i<mb; i++) {
2074     ncol = bi[i+1]-bi[i];
2075     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2076     row++;
2077     array += ncol; cols_tmp += ncol;
2078   }
2079   ierr = PetscFree(cols);CHKERRQ(ierr);
2080 
2081   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2084     *matout = B;
2085   } else {
2086     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2092 {
2093   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2094   Mat            a    = aij->A,b = aij->B;
2095   PetscErrorCode ierr;
2096   PetscInt       s1,s2,s3;
2097 
2098   PetscFunctionBegin;
2099   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2100   if (rr) {
2101     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2102     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2103     /* Overlap communication with computation. */
2104     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2105   }
2106   if (ll) {
2107     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2108     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2109     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2110   }
2111   /* scale  the diagonal block */
2112   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2113 
2114   if (rr) {
2115     /* Do a scatter end and then right scale the off-diagonal block */
2116     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2117     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2123 {
2124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2125   PetscErrorCode ierr;
2126 
2127   PetscFunctionBegin;
2128   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2133 {
2134   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2135   Mat            a,b,c,d;
2136   PetscBool      flg;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   a = matA->A; b = matA->B;
2141   c = matB->A; d = matB->B;
2142 
2143   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2146   }
2147   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2152 {
2153   PetscErrorCode ierr;
2154   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2155   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2156 
2157   PetscFunctionBegin;
2158   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2159   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2160     /* because of the column compression in the off-processor part of the matrix a->B,
2161        the number of columns in a->B and b->B may be different, hence we cannot call
2162        the MatCopy() directly on the two parts. If need be, we can provide a more
2163        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2164        then copying the submatrices */
2165     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2166   } else {
2167     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2168     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2169   }
2170   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2175 {
2176   PetscErrorCode ierr;
2177 
2178   PetscFunctionBegin;
2179   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 /*
2184    Computes the number of nonzeros per row needed for preallocation when X and Y
2185    have different nonzero structure.
2186 */
2187 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2188 {
2189   PetscInt       i,j,k,nzx,nzy;
2190 
2191   PetscFunctionBegin;
2192   /* Set the number of nonzeros in the new matrix */
2193   for (i=0; i<m; i++) {
2194     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2195     nzx = xi[i+1] - xi[i];
2196     nzy = yi[i+1] - yi[i];
2197     nnz[i] = 0;
2198     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2199       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2200       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2201       nnz[i]++;
2202     }
2203     for (; k<nzy; k++) nnz[i]++;
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2209 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2210 {
2211   PetscErrorCode ierr;
2212   PetscInt       m = Y->rmap->N;
2213   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2214   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2222 {
2223   PetscErrorCode ierr;
2224   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2225   PetscBLASInt   bnz,one=1;
2226   Mat_SeqAIJ     *x,*y;
2227 
2228   PetscFunctionBegin;
2229   if (str == SAME_NONZERO_PATTERN) {
2230     PetscScalar alpha = a;
2231     x    = (Mat_SeqAIJ*)xx->A->data;
2232     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2233     y    = (Mat_SeqAIJ*)yy->A->data;
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     x    = (Mat_SeqAIJ*)xx->B->data;
2236     y    = (Mat_SeqAIJ*)yy->B->data;
2237     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2238     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2239     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2240   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2241     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2242   } else {
2243     Mat      B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2266 {
2267 #if defined(PETSC_USE_COMPLEX)
2268   PetscErrorCode ierr;
2269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2273   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2274 #else
2275   PetscFunctionBegin;
2276 #endif
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2298   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2303 {
2304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode ierr;
2306   PetscInt       i,*idxb = 0;
2307   PetscScalar    *va,*vb;
2308   Vec            vtmp;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2312   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2313   if (idx) {
2314     for (i=0; i<A->rmap->n; i++) {
2315       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2316     }
2317   }
2318 
2319   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2320   if (idx) {
2321     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2322   }
2323   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2324   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2325 
2326   for (i=0; i<A->rmap->n; i++) {
2327     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2328       va[i] = vb[i];
2329       if (idx) idx[i] = a->garray[idxb[i]];
2330     }
2331   }
2332 
2333   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2334   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2335   ierr = PetscFree(idxb);CHKERRQ(ierr);
2336   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,*idxb = 0;
2345   PetscScalar    *va,*vb;
2346   Vec            vtmp;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351   if (idx) {
2352     for (i=0; i<A->cmap->n; i++) {
2353       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2354     }
2355   }
2356 
2357   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2358   if (idx) {
2359     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2360   }
2361   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2362   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2363 
2364   for (i=0; i<A->rmap->n; i++) {
2365     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2366       va[i] = vb[i];
2367       if (idx) idx[i] = a->garray[idxb[i]];
2368     }
2369   }
2370 
2371   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2373   ierr = PetscFree(idxb);CHKERRQ(ierr);
2374   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2379 {
2380   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2381   PetscInt       n      = A->rmap->n;
2382   PetscInt       cstart = A->cmap->rstart;
2383   PetscInt       *cmap  = mat->garray;
2384   PetscInt       *diagIdx, *offdiagIdx;
2385   Vec            diagV, offdiagV;
2386   PetscScalar    *a, *diagA, *offdiagA;
2387   PetscInt       r;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2392   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2393   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2394   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2395   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2396   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   for (r = 0; r < n; ++r) {
2400     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2401       a[r]   = diagA[r];
2402       idx[r] = cstart + diagIdx[r];
2403     } else {
2404       a[r]   = offdiagA[r];
2405       idx[r] = cmap[offdiagIdx[r]];
2406     }
2407   }
2408   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2409   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2411   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2412   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2413   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2418 {
2419   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2420   PetscInt       n      = A->rmap->n;
2421   PetscInt       cstart = A->cmap->rstart;
2422   PetscInt       *cmap  = mat->garray;
2423   PetscInt       *diagIdx, *offdiagIdx;
2424   Vec            diagV, offdiagV;
2425   PetscScalar    *a, *diagA, *offdiagA;
2426   PetscInt       r;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2432   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2433   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2434   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2436   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2437   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2438   for (r = 0; r < n; ++r) {
2439     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2440       a[r]   = diagA[r];
2441       idx[r] = cstart + diagIdx[r];
2442     } else {
2443       a[r]   = offdiagA[r];
2444       idx[r] = cmap[offdiagIdx[r]];
2445     }
2446   }
2447   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2449   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2450   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2451   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2452   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2457 {
2458   PetscErrorCode ierr;
2459   Mat            *dummy;
2460 
2461   PetscFunctionBegin;
2462   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2463   *newmat = *dummy;
2464   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2469 {
2470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2475   A->factorerrortype = a->A->factorerrortype;
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2480 {
2481   PetscErrorCode ierr;
2482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2483 
2484   PetscFunctionBegin;
2485   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2486   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2487   if (x->assembled) {
2488     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2489   } else {
2490     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2491   }
2492   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2493   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2498 {
2499   PetscFunctionBegin;
2500   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2501   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 /*@
2506    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2507 
2508    Collective on Mat
2509 
2510    Input Parameters:
2511 +    A - the matrix
2512 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2513 
2514  Level: advanced
2515 
2516 @*/
2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2518 {
2519   PetscErrorCode       ierr;
2520 
2521   PetscFunctionBegin;
2522   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2527 {
2528   PetscErrorCode       ierr;
2529   PetscBool            sc = PETSC_FALSE,flg;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2533   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2534   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2535   if (flg) {
2536     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2537   }
2538   ierr = PetscOptionsTail();CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2543 {
2544   PetscErrorCode ierr;
2545   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2546   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2547 
2548   PetscFunctionBegin;
2549   if (!Y->preallocated) {
2550     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2551   } else if (!aij->nz) {
2552     PetscInt nonew = aij->nonew;
2553     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2554     aij->nonew = nonew;
2555   }
2556   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2561 {
2562   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2567   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2568   if (d) {
2569     PetscInt rstart;
2570     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2571     *d += rstart;
2572 
2573   }
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2578 {
2579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2580   PetscErrorCode ierr;
2581 
2582   PetscFunctionBegin;
2583   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 /* -------------------------------------------------------------------*/
2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2589                                        MatGetRow_MPIAIJ,
2590                                        MatRestoreRow_MPIAIJ,
2591                                        MatMult_MPIAIJ,
2592                                 /* 4*/ MatMultAdd_MPIAIJ,
2593                                        MatMultTranspose_MPIAIJ,
2594                                        MatMultTransposeAdd_MPIAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*10*/ 0,
2599                                        0,
2600                                        0,
2601                                        MatSOR_MPIAIJ,
2602                                        MatTranspose_MPIAIJ,
2603                                 /*15*/ MatGetInfo_MPIAIJ,
2604                                        MatEqual_MPIAIJ,
2605                                        MatGetDiagonal_MPIAIJ,
2606                                        MatDiagonalScale_MPIAIJ,
2607                                        MatNorm_MPIAIJ,
2608                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2609                                        MatAssemblyEnd_MPIAIJ,
2610                                        MatSetOption_MPIAIJ,
2611                                        MatZeroEntries_MPIAIJ,
2612                                 /*24*/ MatZeroRows_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*29*/ MatSetUp_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                        MatGetDiagonalBlock_MPIAIJ,
2621                                        0,
2622                                 /*34*/ MatDuplicate_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*39*/ MatAXPY_MPIAIJ,
2628                                        MatCreateSubMatrices_MPIAIJ,
2629                                        MatIncreaseOverlap_MPIAIJ,
2630                                        MatGetValues_MPIAIJ,
2631                                        MatCopy_MPIAIJ,
2632                                 /*44*/ MatGetRowMax_MPIAIJ,
2633                                        MatScale_MPIAIJ,
2634                                        MatShift_MPIAIJ,
2635                                        MatDiagonalSet_MPIAIJ,
2636                                        MatZeroRowsColumns_MPIAIJ,
2637                                 /*49*/ MatSetRandom_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2643                                        0,
2644                                        MatSetUnfactored_MPIAIJ,
2645                                        MatPermute_MPIAIJ,
2646                                        0,
2647                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2648                                        MatDestroy_MPIAIJ,
2649                                        MatView_MPIAIJ,
2650                                        0,
2651                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2652                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2653                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2658                                        MatGetRowMinAbs_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*75*/ MatFDColoringApply_AIJ,
2664                                        MatSetFromOptions_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        MatFindZeroDiagonals_MPIAIJ,
2668                                 /*80*/ 0,
2669                                        0,
2670                                        0,
2671                                 /*83*/ MatLoad_MPIAIJ,
2672                                        MatIsSymmetric_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2678                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2679                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2680                                        MatPtAP_MPIAIJ_MPIAIJ,
2681                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2682                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*99*/ 0,
2688                                        0,
2689                                        0,
2690                                        MatConjugate_MPIAIJ,
2691                                        0,
2692                                 /*104*/MatSetValuesRow_MPIAIJ,
2693                                        MatRealPart_MPIAIJ,
2694                                        MatImaginaryPart_MPIAIJ,
2695                                        0,
2696                                        0,
2697                                 /*109*/0,
2698                                        0,
2699                                        MatGetRowMin_MPIAIJ,
2700                                        0,
2701                                        MatMissingDiagonal_MPIAIJ,
2702                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2703                                        0,
2704                                        MatGetGhosts_MPIAIJ,
2705                                        0,
2706                                        0,
2707                                 /*119*/0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatGetMultiProcBlock_MPIAIJ,
2712                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2713                                        MatGetColumnNorms_MPIAIJ,
2714                                        MatInvertBlockDiagonal_MPIAIJ,
2715                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2716                                        MatCreateSubMatricesMPI_MPIAIJ,
2717                                 /*129*/0,
2718                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2719                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2720                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                 /*134*/0,
2723                                        0,
2724                                        MatRARt_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                        0,
2727                                 /*139*/MatSetBlockSizes_MPIAIJ,
2728                                        0,
2729                                        0,
2730                                        MatFDColoringSetUp_MPIXAIJ,
2731                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2732                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2733 };
2734 
2735 /* ----------------------------------------------------------------------------------------*/
2736 
2737 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2738 {
2739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2744   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2749 {
2750   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2755   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763   PetscMPIInt    size;
2764 
2765   PetscFunctionBegin;
2766   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2768   b = (Mat_MPIAIJ*)B->data;
2769 
2770 #if defined(PETSC_USE_CTABLE)
2771   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2772 #else
2773   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2774 #endif
2775   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2776   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2777   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2778 
2779   /* Because the B will have been resized we simply destroy it and create a new one each time */
2780   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2781   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2782   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2783   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2784   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2785   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2786   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2787 
2788   if (!B->preallocated) {
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2791     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2792     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2793     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2794   }
2795 
2796   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2797   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2798   B->preallocated  = PETSC_TRUE;
2799   B->was_assembled = PETSC_FALSE;
2800   B->assembled     = PETSC_FALSE;
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2805 {
2806   Mat_MPIAIJ     *b;
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2811   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2812   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2813   b = (Mat_MPIAIJ*)B->data;
2814 
2815 #if defined(PETSC_USE_CTABLE)
2816   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2817 #else
2818   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2819 #endif
2820   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2821   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2822   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2823 
2824   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2825   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2826   B->preallocated  = PETSC_TRUE;
2827   B->was_assembled = PETSC_FALSE;
2828   B->assembled = PETSC_FALSE;
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2833 {
2834   Mat            mat;
2835   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   *newmat = 0;
2840   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2841   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2842   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2843   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2844   a       = (Mat_MPIAIJ*)mat->data;
2845 
2846   mat->factortype   = matin->factortype;
2847   mat->assembled    = PETSC_TRUE;
2848   mat->insertmode   = NOT_SET_VALUES;
2849   mat->preallocated = PETSC_TRUE;
2850 
2851   a->size         = oldmat->size;
2852   a->rank         = oldmat->rank;
2853   a->donotstash   = oldmat->donotstash;
2854   a->roworiented  = oldmat->roworiented;
2855   a->rowindices   = 0;
2856   a->rowvalues    = 0;
2857   a->getrowactive = PETSC_FALSE;
2858 
2859   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2860   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2861 
2862   if (oldmat->colmap) {
2863 #if defined(PETSC_USE_CTABLE)
2864     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2865 #else
2866     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2867     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2868     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2869 #endif
2870   } else a->colmap = 0;
2871   if (oldmat->garray) {
2872     PetscInt len;
2873     len  = oldmat->B->cmap->n;
2874     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2875     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2876     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2877   } else a->garray = 0;
2878 
2879   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2880   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2881   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2883 
2884   if (oldmat->Mvctx_mpi1) {
2885     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2886     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2887   }
2888 
2889   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2890   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2891   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2892   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2893   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2894   *newmat = mat;
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2899 {
2900   PetscBool      isbinary, ishdf5;
2901   PetscErrorCode ierr;
2902 
2903   PetscFunctionBegin;
2904   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2905   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2906   /* force binary viewer to load .info file if it has not yet done so */
2907   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2908   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2909   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2910   if (isbinary) {
2911     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2912   } else if (ishdf5) {
2913 #if defined(PETSC_HAVE_HDF5)
2914     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2915 #else
2916     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2917 #endif
2918   } else {
2919     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2920   }
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2925 {
2926   PetscScalar    *vals,*svals;
2927   MPI_Comm       comm;
2928   PetscErrorCode ierr;
2929   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2930   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2931   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2932   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2933   PetscInt       cend,cstart,n,*rowners;
2934   int            fd;
2935   PetscInt       bs = newMat->rmap->bs;
2936 
2937   PetscFunctionBegin;
2938   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2939   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2940   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2941   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2942   if (!rank) {
2943     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2944     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2945     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2946   }
2947 
2948   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2949   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2950   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2951   if (bs < 0) bs = 1;
2952 
2953   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2954   M    = header[1]; N = header[2];
2955 
2956   /* If global sizes are set, check if they are consistent with that given in the file */
2957   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2958   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2959 
2960   /* determine ownership of all (block) rows */
2961   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2962   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2963   else m = newMat->rmap->n; /* Set by user */
2964 
2965   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2966   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2967 
2968   /* First process needs enough room for process with most rows */
2969   if (!rank) {
2970     mmax = rowners[1];
2971     for (i=2; i<=size; i++) {
2972       mmax = PetscMax(mmax, rowners[i]);
2973     }
2974   } else mmax = -1;             /* unused, but compilers complain */
2975 
2976   rowners[0] = 0;
2977   for (i=2; i<=size; i++) {
2978     rowners[i] += rowners[i-1];
2979   }
2980   rstart = rowners[rank];
2981   rend   = rowners[rank+1];
2982 
2983   /* distribute row lengths to all processors */
2984   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2985   if (!rank) {
2986     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
2987     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2988     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2989     for (j=0; j<m; j++) {
2990       procsnz[0] += ourlens[j];
2991     }
2992     for (i=1; i<size; i++) {
2993       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
2994       /* calculate the number of nonzeros on each processor */
2995       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2996         procsnz[i] += rowlengths[j];
2997       }
2998       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2999     }
3000     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3001   } else {
3002     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3003   }
3004 
3005   if (!rank) {
3006     /* determine max buffer needed and allocate it */
3007     maxnz = 0;
3008     for (i=0; i<size; i++) {
3009       maxnz = PetscMax(maxnz,procsnz[i]);
3010     }
3011     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3012 
3013     /* read in my part of the matrix column indices  */
3014     nz   = procsnz[0];
3015     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3016     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3017 
3018     /* read in every one elses and ship off */
3019     for (i=1; i<size; i++) {
3020       nz   = procsnz[i];
3021       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3022       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3023     }
3024     ierr = PetscFree(cols);CHKERRQ(ierr);
3025   } else {
3026     /* determine buffer space needed for message */
3027     nz = 0;
3028     for (i=0; i<m; i++) {
3029       nz += ourlens[i];
3030     }
3031     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3032 
3033     /* receive message of column indices*/
3034     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3035   }
3036 
3037   /* determine column ownership if matrix is not square */
3038   if (N != M) {
3039     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3040     else n = newMat->cmap->n;
3041     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3042     cstart = cend - n;
3043   } else {
3044     cstart = rstart;
3045     cend   = rend;
3046     n      = cend - cstart;
3047   }
3048 
3049   /* loop over local rows, determining number of off diagonal entries */
3050   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3051   jj   = 0;
3052   for (i=0; i<m; i++) {
3053     for (j=0; j<ourlens[i]; j++) {
3054       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3055       jj++;
3056     }
3057   }
3058 
3059   for (i=0; i<m; i++) {
3060     ourlens[i] -= offlens[i];
3061   }
3062   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3063 
3064   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3065 
3066   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3067 
3068   for (i=0; i<m; i++) {
3069     ourlens[i] += offlens[i];
3070   }
3071 
3072   if (!rank) {
3073     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3074 
3075     /* read in my part of the matrix numerical values  */
3076     nz   = procsnz[0];
3077     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3078 
3079     /* insert into matrix */
3080     jj      = rstart;
3081     smycols = mycols;
3082     svals   = vals;
3083     for (i=0; i<m; i++) {
3084       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3085       smycols += ourlens[i];
3086       svals   += ourlens[i];
3087       jj++;
3088     }
3089 
3090     /* read in other processors and ship out */
3091     for (i=1; i<size; i++) {
3092       nz   = procsnz[i];
3093       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3094       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3095     }
3096     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3097   } else {
3098     /* receive numeric values */
3099     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3100 
3101     /* receive message of values*/
3102     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3103 
3104     /* insert into matrix */
3105     jj      = rstart;
3106     smycols = mycols;
3107     svals   = vals;
3108     for (i=0; i<m; i++) {
3109       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3110       smycols += ourlens[i];
3111       svals   += ourlens[i];
3112       jj++;
3113     }
3114   }
3115   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3116   ierr = PetscFree(vals);CHKERRQ(ierr);
3117   ierr = PetscFree(mycols);CHKERRQ(ierr);
3118   ierr = PetscFree(rowners);CHKERRQ(ierr);
3119   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3120   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3121   PetscFunctionReturn(0);
3122 }
3123 
3124 /* Not scalable because of ISAllGather() unless getting all columns. */
3125 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3126 {
3127   PetscErrorCode ierr;
3128   IS             iscol_local;
3129   PetscBool      isstride;
3130   PetscMPIInt    lisstride=0,gisstride;
3131 
3132   PetscFunctionBegin;
3133   /* check if we are grabbing all columns*/
3134   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3135 
3136   if (isstride) {
3137     PetscInt  start,len,mstart,mlen;
3138     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3139     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3140     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3141     if (mstart == start && mlen-mstart == len) lisstride = 1;
3142   }
3143 
3144   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3145   if (gisstride) {
3146     PetscInt N;
3147     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3148     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3149     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3150     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3151   } else {
3152     PetscInt cbs;
3153     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3154     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3155     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3156   }
3157 
3158   *isseq = iscol_local;
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 /*
3163  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3164  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3165 
3166  Input Parameters:
3167    mat - matrix
3168    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3169            i.e., mat->rstart <= isrow[i] < mat->rend
3170    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3171            i.e., mat->cstart <= iscol[i] < mat->cend
3172  Output Parameter:
3173    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3174    iscol_o - sequential column index set for retrieving mat->B
3175    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3176  */
3177 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3178 {
3179   PetscErrorCode ierr;
3180   Vec            x,cmap;
3181   const PetscInt *is_idx;
3182   PetscScalar    *xarray,*cmaparray;
3183   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3184   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3185   Mat            B=a->B;
3186   Vec            lvec=a->lvec,lcmap;
3187   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3188   MPI_Comm       comm;
3189   VecScatter     Mvctx=a->Mvctx;
3190 
3191   PetscFunctionBegin;
3192   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3193   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3194 
3195   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3196   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3197   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3198   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3199   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3200 
3201   /* Get start indices */
3202   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3203   isstart -= ncols;
3204   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3205 
3206   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3207   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3208   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3209   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3210   for (i=0; i<ncols; i++) {
3211     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3212     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3213     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3214   }
3215   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3216   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3217   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3218 
3219   /* Get iscol_d */
3220   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3221   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3222   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3223 
3224   /* Get isrow_d */
3225   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3226   rstart = mat->rmap->rstart;
3227   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3228   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3229   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3230   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3231 
3232   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3233   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3234   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3235 
3236   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3237   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3238   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3239 
3240   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3241 
3242   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3243   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3244 
3245   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3246   /* off-process column indices */
3247   count = 0;
3248   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3249   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3250 
3251   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3252   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3253   for (i=0; i<Bn; i++) {
3254     if (PetscRealPart(xarray[i]) > -1.0) {
3255       idx[count]     = i;                   /* local column index in off-diagonal part B */
3256       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3257       count++;
3258     }
3259   }
3260   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3261   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3262 
3263   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3264   /* cannot ensure iscol_o has same blocksize as iscol! */
3265 
3266   ierr = PetscFree(idx);CHKERRQ(ierr);
3267   *garray = cmap1;
3268 
3269   ierr = VecDestroy(&x);CHKERRQ(ierr);
3270   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3271   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3277 {
3278   PetscErrorCode ierr;
3279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3280   Mat            M = NULL;
3281   MPI_Comm       comm;
3282   IS             iscol_d,isrow_d,iscol_o;
3283   Mat            Asub = NULL,Bsub = NULL;
3284   PetscInt       n;
3285 
3286   PetscFunctionBegin;
3287   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3288 
3289   if (call == MAT_REUSE_MATRIX) {
3290     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3291     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3292     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3293 
3294     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3295     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3296 
3297     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3298     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3299 
3300     /* Update diagonal and off-diagonal portions of submat */
3301     asub = (Mat_MPIAIJ*)(*submat)->data;
3302     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3303     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3304     if (n) {
3305       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3306     }
3307     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3308     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3309 
3310   } else { /* call == MAT_INITIAL_MATRIX) */
3311     const PetscInt *garray;
3312     PetscInt        BsubN;
3313 
3314     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3315     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3316 
3317     /* Create local submatrices Asub and Bsub */
3318     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3319     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3320 
3321     /* Create submatrix M */
3322     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3323 
3324     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3325     asub = (Mat_MPIAIJ*)M->data;
3326 
3327     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3328     n = asub->B->cmap->N;
3329     if (BsubN > n) {
3330       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3331       const PetscInt *idx;
3332       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3333       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3334 
3335       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3336       j = 0;
3337       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3338       for (i=0; i<n; i++) {
3339         if (j >= BsubN) break;
3340         while (subgarray[i] > garray[j]) j++;
3341 
3342         if (subgarray[i] == garray[j]) {
3343           idx_new[i] = idx[j++];
3344         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3345       }
3346       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3347 
3348       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3349       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3350 
3351     } else if (BsubN < n) {
3352       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3353     }
3354 
3355     ierr = PetscFree(garray);CHKERRQ(ierr);
3356     *submat = M;
3357 
3358     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3359     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3360     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3361 
3362     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3363     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3364 
3365     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3366     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3367   }
3368   PetscFunctionReturn(0);
3369 }
3370 
3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3372 {
3373   PetscErrorCode ierr;
3374   IS             iscol_local=NULL,isrow_d;
3375   PetscInt       csize;
3376   PetscInt       n,i,j,start,end;
3377   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3378   MPI_Comm       comm;
3379 
3380   PetscFunctionBegin;
3381   /* If isrow has same processor distribution as mat,
3382      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3383   if (call == MAT_REUSE_MATRIX) {
3384     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3385     if (isrow_d) {
3386       sameRowDist  = PETSC_TRUE;
3387       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3388     } else {
3389       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3390       if (iscol_local) {
3391         sameRowDist  = PETSC_TRUE;
3392         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3393       }
3394     }
3395   } else {
3396     /* Check if isrow has same processor distribution as mat */
3397     sameDist[0] = PETSC_FALSE;
3398     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3399     if (!n) {
3400       sameDist[0] = PETSC_TRUE;
3401     } else {
3402       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3403       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3404       if (i >= start && j < end) {
3405         sameDist[0] = PETSC_TRUE;
3406       }
3407     }
3408 
3409     /* Check if iscol has same processor distribution as mat */
3410     sameDist[1] = PETSC_FALSE;
3411     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3412     if (!n) {
3413       sameDist[1] = PETSC_TRUE;
3414     } else {
3415       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3416       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3417       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3418     }
3419 
3420     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3421     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3422     sameRowDist = tsameDist[0];
3423   }
3424 
3425   if (sameRowDist) {
3426     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3427       /* isrow and iscol have same processor distribution as mat */
3428       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3429       PetscFunctionReturn(0);
3430     } else { /* sameRowDist */
3431       /* isrow has same processor distribution as mat */
3432       if (call == MAT_INITIAL_MATRIX) {
3433         PetscBool sorted;
3434         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3435         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3436         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3437         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3438 
3439         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3440         if (sorted) {
3441           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3442           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3443           PetscFunctionReturn(0);
3444         }
3445       } else { /* call == MAT_REUSE_MATRIX */
3446         IS    iscol_sub;
3447         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3448         if (iscol_sub) {
3449           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3450           PetscFunctionReturn(0);
3451         }
3452       }
3453     }
3454   }
3455 
3456   /* General case: iscol -> iscol_local which has global size of iscol */
3457   if (call == MAT_REUSE_MATRIX) {
3458     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3459     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3460   } else {
3461     if (!iscol_local) {
3462       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3463     }
3464   }
3465 
3466   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3467   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3468 
3469   if (call == MAT_INITIAL_MATRIX) {
3470     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3471     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3472   }
3473   PetscFunctionReturn(0);
3474 }
3475 
3476 /*@C
3477      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3478          and "off-diagonal" part of the matrix in CSR format.
3479 
3480    Collective
3481 
3482    Input Parameters:
3483 +  comm - MPI communicator
3484 .  A - "diagonal" portion of matrix
3485 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3486 -  garray - global index of B columns
3487 
3488    Output Parameter:
3489 .   mat - the matrix, with input A as its local diagonal matrix
3490    Level: advanced
3491 
3492    Notes:
3493        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3494        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3495 
3496 .seealso: MatCreateMPIAIJWithSplitArrays()
3497 @*/
3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3499 {
3500   PetscErrorCode ierr;
3501   Mat_MPIAIJ     *maij;
3502   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3503   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3504   PetscScalar    *oa=b->a;
3505   Mat            Bnew;
3506   PetscInt       m,n,N;
3507 
3508   PetscFunctionBegin;
3509   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3510   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3511   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3512   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3513   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3514   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3515 
3516   /* Get global columns of mat */
3517   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3518 
3519   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3520   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3521   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3522   maij = (Mat_MPIAIJ*)(*mat)->data;
3523 
3524   (*mat)->preallocated = PETSC_TRUE;
3525 
3526   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3527   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3528 
3529   /* Set A as diagonal portion of *mat */
3530   maij->A = A;
3531 
3532   nz = oi[m];
3533   for (i=0; i<nz; i++) {
3534     col   = oj[i];
3535     oj[i] = garray[col];
3536   }
3537 
3538    /* Set Bnew as off-diagonal portion of *mat */
3539   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3540   bnew        = (Mat_SeqAIJ*)Bnew->data;
3541   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3542   maij->B     = Bnew;
3543 
3544   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3545 
3546   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3547   b->free_a       = PETSC_FALSE;
3548   b->free_ij      = PETSC_FALSE;
3549   ierr = MatDestroy(&B);CHKERRQ(ierr);
3550 
3551   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3552   bnew->free_a       = PETSC_TRUE;
3553   bnew->free_ij      = PETSC_TRUE;
3554 
3555   /* condense columns of maij->B */
3556   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3557   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3558   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3560   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3565 
3566 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3567 {
3568   PetscErrorCode ierr;
3569   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3570   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3571   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3572   Mat            M,Msub,B=a->B;
3573   MatScalar      *aa;
3574   Mat_SeqAIJ     *aij;
3575   PetscInt       *garray = a->garray,*colsub,Ncols;
3576   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3577   IS             iscol_sub,iscmap;
3578   const PetscInt *is_idx,*cmap;
3579   PetscBool      allcolumns=PETSC_FALSE;
3580   MPI_Comm       comm;
3581 
3582   PetscFunctionBegin;
3583   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3584 
3585   if (call == MAT_REUSE_MATRIX) {
3586     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3587     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3588     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3589 
3590     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3591     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3592 
3593     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3594     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3595 
3596     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3597 
3598   } else { /* call == MAT_INITIAL_MATRIX) */
3599     PetscBool flg;
3600 
3601     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3602     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3603 
3604     /* (1) iscol -> nonscalable iscol_local */
3605     /* Check for special case: each processor gets entire matrix columns */
3606     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3607     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3608     if (allcolumns) {
3609       iscol_sub = iscol_local;
3610       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3611       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3612 
3613     } else {
3614       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3615       PetscInt *idx,*cmap1,k;
3616       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3617       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3618       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3619       count = 0;
3620       k     = 0;
3621       for (i=0; i<Ncols; i++) {
3622         j = is_idx[i];
3623         if (j >= cstart && j < cend) {
3624           /* diagonal part of mat */
3625           idx[count]     = j;
3626           cmap1[count++] = i; /* column index in submat */
3627         } else if (Bn) {
3628           /* off-diagonal part of mat */
3629           if (j == garray[k]) {
3630             idx[count]     = j;
3631             cmap1[count++] = i;  /* column index in submat */
3632           } else if (j > garray[k]) {
3633             while (j > garray[k] && k < Bn-1) k++;
3634             if (j == garray[k]) {
3635               idx[count]     = j;
3636               cmap1[count++] = i; /* column index in submat */
3637             }
3638           }
3639         }
3640       }
3641       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3642 
3643       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3644       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3645       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3646 
3647       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3648     }
3649 
3650     /* (3) Create sequential Msub */
3651     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3652   }
3653 
3654   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3655   aij  = (Mat_SeqAIJ*)(Msub)->data;
3656   ii   = aij->i;
3657   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3658 
3659   /*
3660       m - number of local rows
3661       Ncols - number of columns (same on all processors)
3662       rstart - first row in new global matrix generated
3663   */
3664   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3665 
3666   if (call == MAT_INITIAL_MATRIX) {
3667     /* (4) Create parallel newmat */
3668     PetscMPIInt    rank,size;
3669     PetscInt       csize;
3670 
3671     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3672     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3673 
3674     /*
3675         Determine the number of non-zeros in the diagonal and off-diagonal
3676         portions of the matrix in order to do correct preallocation
3677     */
3678 
3679     /* first get start and end of "diagonal" columns */
3680     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3681     if (csize == PETSC_DECIDE) {
3682       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3683       if (mglobal == Ncols) { /* square matrix */
3684         nlocal = m;
3685       } else {
3686         nlocal = Ncols/size + ((Ncols % size) > rank);
3687       }
3688     } else {
3689       nlocal = csize;
3690     }
3691     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3692     rstart = rend - nlocal;
3693     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3694 
3695     /* next, compute all the lengths */
3696     jj    = aij->j;
3697     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3698     olens = dlens + m;
3699     for (i=0; i<m; i++) {
3700       jend = ii[i+1] - ii[i];
3701       olen = 0;
3702       dlen = 0;
3703       for (j=0; j<jend; j++) {
3704         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3705         else dlen++;
3706         jj++;
3707       }
3708       olens[i] = olen;
3709       dlens[i] = dlen;
3710     }
3711 
3712     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3713     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3714 
3715     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3716     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3717     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3718     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3719     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3720     ierr = PetscFree(dlens);CHKERRQ(ierr);
3721 
3722   } else { /* call == MAT_REUSE_MATRIX */
3723     M    = *newmat;
3724     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3725     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3726     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3727     /*
3728          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3729        rather than the slower MatSetValues().
3730     */
3731     M->was_assembled = PETSC_TRUE;
3732     M->assembled     = PETSC_FALSE;
3733   }
3734 
3735   /* (5) Set values of Msub to *newmat */
3736   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3737   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3738 
3739   jj   = aij->j;
3740   aa   = aij->a;
3741   for (i=0; i<m; i++) {
3742     row = rstart + i;
3743     nz  = ii[i+1] - ii[i];
3744     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3745     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3746     jj += nz; aa += nz;
3747   }
3748   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3749 
3750   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3751   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752 
3753   ierr = PetscFree(colsub);CHKERRQ(ierr);
3754 
3755   /* save Msub, iscol_sub and iscmap used in processor for next request */
3756   if (call ==  MAT_INITIAL_MATRIX) {
3757     *newmat = M;
3758     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3759     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3760 
3761     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3762     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3763 
3764     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3765     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3766 
3767     if (iscol_local) {
3768       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3769       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3770     }
3771   }
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 /*
3776     Not great since it makes two copies of the submatrix, first an SeqAIJ
3777   in local and then by concatenating the local matrices the end result.
3778   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3779 
3780   Note: This requires a sequential iscol with all indices.
3781 */
3782 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3783 {
3784   PetscErrorCode ierr;
3785   PetscMPIInt    rank,size;
3786   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3787   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3788   Mat            M,Mreuse;
3789   MatScalar      *aa,*vwork;
3790   MPI_Comm       comm;
3791   Mat_SeqAIJ     *aij;
3792   PetscBool      colflag,allcolumns=PETSC_FALSE;
3793 
3794   PetscFunctionBegin;
3795   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3796   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3798 
3799   /* Check for special case: each processor gets entire matrix columns */
3800   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3801   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3802   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3803 
3804   if (call ==  MAT_REUSE_MATRIX) {
3805     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3806     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3807     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3808   } else {
3809     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3810   }
3811 
3812   /*
3813       m - number of local rows
3814       n - number of columns (same on all processors)
3815       rstart - first row in new global matrix generated
3816   */
3817   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3818   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3819   if (call == MAT_INITIAL_MATRIX) {
3820     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3821     ii  = aij->i;
3822     jj  = aij->j;
3823 
3824     /*
3825         Determine the number of non-zeros in the diagonal and off-diagonal
3826         portions of the matrix in order to do correct preallocation
3827     */
3828 
3829     /* first get start and end of "diagonal" columns */
3830     if (csize == PETSC_DECIDE) {
3831       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3832       if (mglobal == n) { /* square matrix */
3833         nlocal = m;
3834       } else {
3835         nlocal = n/size + ((n % size) > rank);
3836       }
3837     } else {
3838       nlocal = csize;
3839     }
3840     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3841     rstart = rend - nlocal;
3842     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3843 
3844     /* next, compute all the lengths */
3845     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3846     olens = dlens + m;
3847     for (i=0; i<m; i++) {
3848       jend = ii[i+1] - ii[i];
3849       olen = 0;
3850       dlen = 0;
3851       for (j=0; j<jend; j++) {
3852         if (*jj < rstart || *jj >= rend) olen++;
3853         else dlen++;
3854         jj++;
3855       }
3856       olens[i] = olen;
3857       dlens[i] = dlen;
3858     }
3859     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3860     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3861     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3862     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3863     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3864     ierr = PetscFree(dlens);CHKERRQ(ierr);
3865   } else {
3866     PetscInt ml,nl;
3867 
3868     M    = *newmat;
3869     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3870     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3871     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3872     /*
3873          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3874        rather than the slower MatSetValues().
3875     */
3876     M->was_assembled = PETSC_TRUE;
3877     M->assembled     = PETSC_FALSE;
3878   }
3879   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3880   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3881   ii   = aij->i;
3882   jj   = aij->j;
3883   aa   = aij->a;
3884   for (i=0; i<m; i++) {
3885     row   = rstart + i;
3886     nz    = ii[i+1] - ii[i];
3887     cwork = jj;     jj += nz;
3888     vwork = aa;     aa += nz;
3889     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3890   }
3891 
3892   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3894   *newmat = M;
3895 
3896   /* save submatrix used in processor for next request */
3897   if (call ==  MAT_INITIAL_MATRIX) {
3898     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3899     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3900   }
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3905 {
3906   PetscInt       m,cstart, cend,j,nnz,i,d;
3907   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3908   const PetscInt *JJ;
3909   PetscScalar    *values;
3910   PetscErrorCode ierr;
3911   PetscBool      nooffprocentries;
3912 
3913   PetscFunctionBegin;
3914   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3915 
3916   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3917   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3918   m      = B->rmap->n;
3919   cstart = B->cmap->rstart;
3920   cend   = B->cmap->rend;
3921   rstart = B->rmap->rstart;
3922 
3923   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3924 
3925 #if defined(PETSC_USE_DEBUG)
3926   for (i=0; i<m; i++) {
3927     nnz = Ii[i+1]- Ii[i];
3928     JJ  = J + Ii[i];
3929     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3930     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3931     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3932   }
3933 #endif
3934 
3935   for (i=0; i<m; i++) {
3936     nnz     = Ii[i+1]- Ii[i];
3937     JJ      = J + Ii[i];
3938     nnz_max = PetscMax(nnz_max,nnz);
3939     d       = 0;
3940     for (j=0; j<nnz; j++) {
3941       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3942     }
3943     d_nnz[i] = d;
3944     o_nnz[i] = nnz - d;
3945   }
3946   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3947   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3948 
3949   if (v) values = (PetscScalar*)v;
3950   else {
3951     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3952   }
3953 
3954   for (i=0; i<m; i++) {
3955     ii   = i + rstart;
3956     nnz  = Ii[i+1]- Ii[i];
3957     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3958   }
3959   nooffprocentries    = B->nooffprocentries;
3960   B->nooffprocentries = PETSC_TRUE;
3961   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3962   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   B->nooffprocentries = nooffprocentries;
3964 
3965   if (!v) {
3966     ierr = PetscFree(values);CHKERRQ(ierr);
3967   }
3968   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3969   PetscFunctionReturn(0);
3970 }
3971 
3972 /*@
3973    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3974    (the default parallel PETSc format).
3975 
3976    Collective
3977 
3978    Input Parameters:
3979 +  B - the matrix
3980 .  i - the indices into j for the start of each local row (starts with zero)
3981 .  j - the column indices for each local row (starts with zero)
3982 -  v - optional values in the matrix
3983 
3984    Level: developer
3985 
3986    Notes:
3987        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3988      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3989      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3990 
3991        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3992 
3993        The format which is used for the sparse matrix input, is equivalent to a
3994     row-major ordering.. i.e for the following matrix, the input data expected is
3995     as shown
3996 
3997 $        1 0 0
3998 $        2 0 3     P0
3999 $       -------
4000 $        4 5 6     P1
4001 $
4002 $     Process0 [P0]: rows_owned=[0,1]
4003 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4004 $        j =  {0,0,2}  [size = 3]
4005 $        v =  {1,2,3}  [size = 3]
4006 $
4007 $     Process1 [P1]: rows_owned=[2]
4008 $        i =  {0,3}    [size = nrow+1  = 1+1]
4009 $        j =  {0,1,2}  [size = 3]
4010 $        v =  {4,5,6}  [size = 3]
4011 
4012 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4013           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4014 @*/
4015 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4016 {
4017   PetscErrorCode ierr;
4018 
4019   PetscFunctionBegin;
4020   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4021   PetscFunctionReturn(0);
4022 }
4023 
4024 /*@C
4025    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4026    (the default parallel PETSc format).  For good matrix assembly performance
4027    the user should preallocate the matrix storage by setting the parameters
4028    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4029    performance can be increased by more than a factor of 50.
4030 
4031    Collective
4032 
4033    Input Parameters:
4034 +  B - the matrix
4035 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4036            (same value is used for all local rows)
4037 .  d_nnz - array containing the number of nonzeros in the various rows of the
4038            DIAGONAL portion of the local submatrix (possibly different for each row)
4039            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4040            The size of this array is equal to the number of local rows, i.e 'm'.
4041            For matrices that will be factored, you must leave room for (and set)
4042            the diagonal entry even if it is zero.
4043 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4044            submatrix (same value is used for all local rows).
4045 -  o_nnz - array containing the number of nonzeros in the various rows of the
4046            OFF-DIAGONAL portion of the local submatrix (possibly different for
4047            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4048            structure. The size of this array is equal to the number
4049            of local rows, i.e 'm'.
4050 
4051    If the *_nnz parameter is given then the *_nz parameter is ignored
4052 
4053    The AIJ format (also called the Yale sparse matrix format or
4054    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4055    storage.  The stored row and column indices begin with zero.
4056    See Users-Manual: ch_mat for details.
4057 
4058    The parallel matrix is partitioned such that the first m0 rows belong to
4059    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4060    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4061 
4062    The DIAGONAL portion of the local submatrix of a processor can be defined
4063    as the submatrix which is obtained by extraction the part corresponding to
4064    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4065    first row that belongs to the processor, r2 is the last row belonging to
4066    the this processor, and c1-c2 is range of indices of the local part of a
4067    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4068    common case of a square matrix, the row and column ranges are the same and
4069    the DIAGONAL part is also square. The remaining portion of the local
4070    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4071 
4072    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4073 
4074    You can call MatGetInfo() to get information on how effective the preallocation was;
4075    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4076    You can also run with the option -info and look for messages with the string
4077    malloc in them to see if additional memory allocation was needed.
4078 
4079    Example usage:
4080 
4081    Consider the following 8x8 matrix with 34 non-zero values, that is
4082    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4083    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4084    as follows:
4085 
4086 .vb
4087             1  2  0  |  0  3  0  |  0  4
4088     Proc0   0  5  6  |  7  0  0  |  8  0
4089             9  0 10  | 11  0  0  | 12  0
4090     -------------------------------------
4091            13  0 14  | 15 16 17  |  0  0
4092     Proc1   0 18  0  | 19 20 21  |  0  0
4093             0  0  0  | 22 23  0  | 24  0
4094     -------------------------------------
4095     Proc2  25 26 27  |  0  0 28  | 29  0
4096            30  0  0  | 31 32 33  |  0 34
4097 .ve
4098 
4099    This can be represented as a collection of submatrices as:
4100 
4101 .vb
4102       A B C
4103       D E F
4104       G H I
4105 .ve
4106 
4107    Where the submatrices A,B,C are owned by proc0, D,E,F are
4108    owned by proc1, G,H,I are owned by proc2.
4109 
4110    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4111    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4112    The 'M','N' parameters are 8,8, and have the same values on all procs.
4113 
4114    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4115    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4116    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4117    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4118    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4119    matrix, ans [DF] as another SeqAIJ matrix.
4120 
4121    When d_nz, o_nz parameters are specified, d_nz storage elements are
4122    allocated for every row of the local diagonal submatrix, and o_nz
4123    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4124    One way to choose d_nz and o_nz is to use the max nonzerors per local
4125    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4126    In this case, the values of d_nz,o_nz are:
4127 .vb
4128      proc0 : dnz = 2, o_nz = 2
4129      proc1 : dnz = 3, o_nz = 2
4130      proc2 : dnz = 1, o_nz = 4
4131 .ve
4132    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4133    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4134    for proc3. i.e we are using 12+15+10=37 storage locations to store
4135    34 values.
4136 
4137    When d_nnz, o_nnz parameters are specified, the storage is specified
4138    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4139    In the above case the values for d_nnz,o_nnz are:
4140 .vb
4141      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4142      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4143      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4144 .ve
4145    Here the space allocated is sum of all the above values i.e 34, and
4146    hence pre-allocation is perfect.
4147 
4148    Level: intermediate
4149 
4150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4151           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4152 @*/
4153 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4154 {
4155   PetscErrorCode ierr;
4156 
4157   PetscFunctionBegin;
4158   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4159   PetscValidType(B,1);
4160   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 /*@
4165      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4166          CSR format for the local rows.
4167 
4168    Collective
4169 
4170    Input Parameters:
4171 +  comm - MPI communicator
4172 .  m - number of local rows (Cannot be PETSC_DECIDE)
4173 .  n - This value should be the same as the local size used in creating the
4174        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4175        calculated if N is given) For square matrices n is almost always m.
4176 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4177 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4178 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4179 .   j - column indices
4180 -   a - matrix values
4181 
4182    Output Parameter:
4183 .   mat - the matrix
4184 
4185    Level: intermediate
4186 
4187    Notes:
4188        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4189      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4190      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4191 
4192        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4193 
4194        The format which is used for the sparse matrix input, is equivalent to a
4195     row-major ordering.. i.e for the following matrix, the input data expected is
4196     as shown
4197 
4198        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4199 
4200 $        1 0 0
4201 $        2 0 3     P0
4202 $       -------
4203 $        4 5 6     P1
4204 $
4205 $     Process0 [P0]: rows_owned=[0,1]
4206 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4207 $        j =  {0,0,2}  [size = 3]
4208 $        v =  {1,2,3}  [size = 3]
4209 $
4210 $     Process1 [P1]: rows_owned=[2]
4211 $        i =  {0,3}    [size = nrow+1  = 1+1]
4212 $        j =  {0,1,2}  [size = 3]
4213 $        v =  {4,5,6}  [size = 3]
4214 
4215 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4216           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4217 @*/
4218 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4219 {
4220   PetscErrorCode ierr;
4221 
4222   PetscFunctionBegin;
4223   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4224   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4225   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4226   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4227   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4228   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4229   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 /*@
4234      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4235          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4236 
4237    Collective
4238 
4239    Input Parameters:
4240 +  mat - the matrix
4241 .  m - number of local rows (Cannot be PETSC_DECIDE)
4242 .  n - This value should be the same as the local size used in creating the
4243        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4244        calculated if N is given) For square matrices n is almost always m.
4245 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4246 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4247 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4248 .  J - column indices
4249 -  v - matrix values
4250 
4251    Level: intermediate
4252 
4253 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4254           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4255 @*/
4256 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4257 {
4258   PetscErrorCode ierr;
4259   PetscInt       cstart,nnz,i,j;
4260   PetscInt       *ld;
4261   PetscBool      nooffprocentries;
4262   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4263   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4264   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4265   const PetscInt *Adi = Ad->i;
4266   PetscInt       ldi,Iii,md;
4267 
4268   PetscFunctionBegin;
4269   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4270   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4271   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4272   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4273 
4274   cstart = mat->cmap->rstart;
4275   if (!Aij->ld) {
4276     /* count number of entries below block diagonal */
4277     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4278     Aij->ld = ld;
4279     for (i=0; i<m; i++) {
4280       nnz  = Ii[i+1]- Ii[i];
4281       j     = 0;
4282       while  (J[j] < cstart && j < nnz) {j++;}
4283       J    += nnz;
4284       ld[i] = j;
4285     }
4286   } else {
4287     ld = Aij->ld;
4288   }
4289 
4290   for (i=0; i<m; i++) {
4291     nnz  = Ii[i+1]- Ii[i];
4292     Iii  = Ii[i];
4293     ldi  = ld[i];
4294     md   = Adi[i+1]-Adi[i];
4295     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4296     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4297     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4298     ad  += md;
4299     ao  += nnz - md;
4300   }
4301   nooffprocentries      = mat->nooffprocentries;
4302   mat->nooffprocentries = PETSC_TRUE;
4303   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4304   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4305   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4306   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4307   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4308   mat->nooffprocentries = nooffprocentries;
4309   PetscFunctionReturn(0);
4310 }
4311 
4312 /*@C
4313    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4314    (the default parallel PETSc format).  For good matrix assembly performance
4315    the user should preallocate the matrix storage by setting the parameters
4316    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4317    performance can be increased by more than a factor of 50.
4318 
4319    Collective
4320 
4321    Input Parameters:
4322 +  comm - MPI communicator
4323 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4324            This value should be the same as the local size used in creating the
4325            y vector for the matrix-vector product y = Ax.
4326 .  n - This value should be the same as the local size used in creating the
4327        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4328        calculated if N is given) For square matrices n is almost always m.
4329 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4330 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4331 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4332            (same value is used for all local rows)
4333 .  d_nnz - array containing the number of nonzeros in the various rows of the
4334            DIAGONAL portion of the local submatrix (possibly different for each row)
4335            or NULL, if d_nz is used to specify the nonzero structure.
4336            The size of this array is equal to the number of local rows, i.e 'm'.
4337 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4338            submatrix (same value is used for all local rows).
4339 -  o_nnz - array containing the number of nonzeros in the various rows of the
4340            OFF-DIAGONAL portion of the local submatrix (possibly different for
4341            each row) or NULL, if o_nz is used to specify the nonzero
4342            structure. The size of this array is equal to the number
4343            of local rows, i.e 'm'.
4344 
4345    Output Parameter:
4346 .  A - the matrix
4347 
4348    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4349    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4350    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4351 
4352    Notes:
4353    If the *_nnz parameter is given then the *_nz parameter is ignored
4354 
4355    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4356    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4357    storage requirements for this matrix.
4358 
4359    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4360    processor than it must be used on all processors that share the object for
4361    that argument.
4362 
4363    The user MUST specify either the local or global matrix dimensions
4364    (possibly both).
4365 
4366    The parallel matrix is partitioned across processors such that the
4367    first m0 rows belong to process 0, the next m1 rows belong to
4368    process 1, the next m2 rows belong to process 2 etc.. where
4369    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4370    values corresponding to [m x N] submatrix.
4371 
4372    The columns are logically partitioned with the n0 columns belonging
4373    to 0th partition, the next n1 columns belonging to the next
4374    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4375 
4376    The DIAGONAL portion of the local submatrix on any given processor
4377    is the submatrix corresponding to the rows and columns m,n
4378    corresponding to the given processor. i.e diagonal matrix on
4379    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4380    etc. The remaining portion of the local submatrix [m x (N-n)]
4381    constitute the OFF-DIAGONAL portion. The example below better
4382    illustrates this concept.
4383 
4384    For a square global matrix we define each processor's diagonal portion
4385    to be its local rows and the corresponding columns (a square submatrix);
4386    each processor's off-diagonal portion encompasses the remainder of the
4387    local matrix (a rectangular submatrix).
4388 
4389    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4390 
4391    When calling this routine with a single process communicator, a matrix of
4392    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4393    type of communicator, use the construction mechanism
4394 .vb
4395      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4396 .ve
4397 
4398 $     MatCreate(...,&A);
4399 $     MatSetType(A,MATMPIAIJ);
4400 $     MatSetSizes(A, m,n,M,N);
4401 $     MatMPIAIJSetPreallocation(A,...);
4402 
4403    By default, this format uses inodes (identical nodes) when possible.
4404    We search for consecutive rows with the same nonzero structure, thereby
4405    reusing matrix information to achieve increased efficiency.
4406 
4407    Options Database Keys:
4408 +  -mat_no_inode  - Do not use inodes
4409 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4410 
4411 
4412 
4413    Example usage:
4414 
4415    Consider the following 8x8 matrix with 34 non-zero values, that is
4416    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4417    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4418    as follows
4419 
4420 .vb
4421             1  2  0  |  0  3  0  |  0  4
4422     Proc0   0  5  6  |  7  0  0  |  8  0
4423             9  0 10  | 11  0  0  | 12  0
4424     -------------------------------------
4425            13  0 14  | 15 16 17  |  0  0
4426     Proc1   0 18  0  | 19 20 21  |  0  0
4427             0  0  0  | 22 23  0  | 24  0
4428     -------------------------------------
4429     Proc2  25 26 27  |  0  0 28  | 29  0
4430            30  0  0  | 31 32 33  |  0 34
4431 .ve
4432 
4433    This can be represented as a collection of submatrices as
4434 
4435 .vb
4436       A B C
4437       D E F
4438       G H I
4439 .ve
4440 
4441    Where the submatrices A,B,C are owned by proc0, D,E,F are
4442    owned by proc1, G,H,I are owned by proc2.
4443 
4444    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4445    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4446    The 'M','N' parameters are 8,8, and have the same values on all procs.
4447 
4448    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4449    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4450    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4451    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4452    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4453    matrix, ans [DF] as another SeqAIJ matrix.
4454 
4455    When d_nz, o_nz parameters are specified, d_nz storage elements are
4456    allocated for every row of the local diagonal submatrix, and o_nz
4457    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4458    One way to choose d_nz and o_nz is to use the max nonzerors per local
4459    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4460    In this case, the values of d_nz,o_nz are
4461 .vb
4462      proc0 : dnz = 2, o_nz = 2
4463      proc1 : dnz = 3, o_nz = 2
4464      proc2 : dnz = 1, o_nz = 4
4465 .ve
4466    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4467    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4468    for proc3. i.e we are using 12+15+10=37 storage locations to store
4469    34 values.
4470 
4471    When d_nnz, o_nnz parameters are specified, the storage is specified
4472    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4473    In the above case the values for d_nnz,o_nnz are
4474 .vb
4475      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4476      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4477      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4478 .ve
4479    Here the space allocated is sum of all the above values i.e 34, and
4480    hence pre-allocation is perfect.
4481 
4482    Level: intermediate
4483 
4484 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4485           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4486 @*/
4487 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4488 {
4489   PetscErrorCode ierr;
4490   PetscMPIInt    size;
4491 
4492   PetscFunctionBegin;
4493   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4494   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4495   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4496   if (size > 1) {
4497     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4498     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4499   } else {
4500     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4501     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4502   }
4503   PetscFunctionReturn(0);
4504 }
4505 
4506 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4507 {
4508   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4509   PetscBool      flg;
4510   PetscErrorCode ierr;
4511 
4512   PetscFunctionBegin;
4513   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4514   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4515   if (Ad)     *Ad     = a->A;
4516   if (Ao)     *Ao     = a->B;
4517   if (colmap) *colmap = a->garray;
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4522 {
4523   PetscErrorCode ierr;
4524   PetscInt       m,N,i,rstart,nnz,Ii;
4525   PetscInt       *indx;
4526   PetscScalar    *values;
4527 
4528   PetscFunctionBegin;
4529   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4530   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4531     PetscInt       *dnz,*onz,sum,bs,cbs;
4532 
4533     if (n == PETSC_DECIDE) {
4534       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4535     }
4536     /* Check sum(n) = N */
4537     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4538     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4539 
4540     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4541     rstart -= m;
4542 
4543     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4544     for (i=0; i<m; i++) {
4545       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4546       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4547       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4548     }
4549 
4550     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4551     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4552     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4553     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4554     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4555     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4556     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4557     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4558   }
4559 
4560   /* numeric phase */
4561   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4562   for (i=0; i<m; i++) {
4563     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4564     Ii   = i + rstart;
4565     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4566     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4567   }
4568   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4569   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4570   PetscFunctionReturn(0);
4571 }
4572 
4573 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4574 {
4575   PetscErrorCode    ierr;
4576   PetscMPIInt       rank;
4577   PetscInt          m,N,i,rstart,nnz;
4578   size_t            len;
4579   const PetscInt    *indx;
4580   PetscViewer       out;
4581   char              *name;
4582   Mat               B;
4583   const PetscScalar *values;
4584 
4585   PetscFunctionBegin;
4586   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4587   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4588   /* Should this be the type of the diagonal block of A? */
4589   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4590   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4591   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4592   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4593   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4594   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4595   for (i=0; i<m; i++) {
4596     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4597     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4598     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4599   }
4600   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4601   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602 
4603   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4604   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4605   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4606   sprintf(name,"%s.%d",outfile,rank);
4607   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4608   ierr = PetscFree(name);CHKERRQ(ierr);
4609   ierr = MatView(B,out);CHKERRQ(ierr);
4610   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4611   ierr = MatDestroy(&B);CHKERRQ(ierr);
4612   PetscFunctionReturn(0);
4613 }
4614 
4615 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4616 {
4617   PetscErrorCode      ierr;
4618   Mat_Merge_SeqsToMPI *merge;
4619   PetscContainer      container;
4620 
4621   PetscFunctionBegin;
4622   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4623   if (container) {
4624     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4625     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4626     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4627     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4628     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4629     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4630     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4631     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4632     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4633     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4634     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4635     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4636     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4637     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4638     ierr = PetscFree(merge);CHKERRQ(ierr);
4639     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4640   }
4641   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4642   PetscFunctionReturn(0);
4643 }
4644 
4645 #include <../src/mat/utils/freespace.h>
4646 #include <petscbt.h>
4647 
4648 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4649 {
4650   PetscErrorCode      ierr;
4651   MPI_Comm            comm;
4652   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4653   PetscMPIInt         size,rank,taga,*len_s;
4654   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4655   PetscInt            proc,m;
4656   PetscInt            **buf_ri,**buf_rj;
4657   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4658   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4659   MPI_Request         *s_waits,*r_waits;
4660   MPI_Status          *status;
4661   MatScalar           *aa=a->a;
4662   MatScalar           **abuf_r,*ba_i;
4663   Mat_Merge_SeqsToMPI *merge;
4664   PetscContainer      container;
4665 
4666   PetscFunctionBegin;
4667   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4668   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4669 
4670   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4671   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4672 
4673   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4674   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4675 
4676   bi     = merge->bi;
4677   bj     = merge->bj;
4678   buf_ri = merge->buf_ri;
4679   buf_rj = merge->buf_rj;
4680 
4681   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4682   owners = merge->rowmap->range;
4683   len_s  = merge->len_s;
4684 
4685   /* send and recv matrix values */
4686   /*-----------------------------*/
4687   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4688   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4689 
4690   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4691   for (proc=0,k=0; proc<size; proc++) {
4692     if (!len_s[proc]) continue;
4693     i    = owners[proc];
4694     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4695     k++;
4696   }
4697 
4698   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4699   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4700   ierr = PetscFree(status);CHKERRQ(ierr);
4701 
4702   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4703   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4704 
4705   /* insert mat values of mpimat */
4706   /*----------------------------*/
4707   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4708   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4709 
4710   for (k=0; k<merge->nrecv; k++) {
4711     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4712     nrows       = *(buf_ri_k[k]);
4713     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4714     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4715   }
4716 
4717   /* set values of ba */
4718   m = merge->rowmap->n;
4719   for (i=0; i<m; i++) {
4720     arow = owners[rank] + i;
4721     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4722     bnzi = bi[i+1] - bi[i];
4723     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4724 
4725     /* add local non-zero vals of this proc's seqmat into ba */
4726     anzi   = ai[arow+1] - ai[arow];
4727     aj     = a->j + ai[arow];
4728     aa     = a->a + ai[arow];
4729     nextaj = 0;
4730     for (j=0; nextaj<anzi; j++) {
4731       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4732         ba_i[j] += aa[nextaj++];
4733       }
4734     }
4735 
4736     /* add received vals into ba */
4737     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4738       /* i-th row */
4739       if (i == *nextrow[k]) {
4740         anzi   = *(nextai[k]+1) - *nextai[k];
4741         aj     = buf_rj[k] + *(nextai[k]);
4742         aa     = abuf_r[k] + *(nextai[k]);
4743         nextaj = 0;
4744         for (j=0; nextaj<anzi; j++) {
4745           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4746             ba_i[j] += aa[nextaj++];
4747           }
4748         }
4749         nextrow[k]++; nextai[k]++;
4750       }
4751     }
4752     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4753   }
4754   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4755   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4756 
4757   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4758   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4759   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4760   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4761   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4762   PetscFunctionReturn(0);
4763 }
4764 
4765 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4766 {
4767   PetscErrorCode      ierr;
4768   Mat                 B_mpi;
4769   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4770   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4771   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4772   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4773   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4774   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4775   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4776   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4777   MPI_Status          *status;
4778   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4779   PetscBT             lnkbt;
4780   Mat_Merge_SeqsToMPI *merge;
4781   PetscContainer      container;
4782 
4783   PetscFunctionBegin;
4784   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4785 
4786   /* make sure it is a PETSc comm */
4787   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4788   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4789   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4790 
4791   ierr = PetscNew(&merge);CHKERRQ(ierr);
4792   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4793 
4794   /* determine row ownership */
4795   /*---------------------------------------------------------*/
4796   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4797   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4798   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4799   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4800   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4801   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4802   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4803 
4804   m      = merge->rowmap->n;
4805   owners = merge->rowmap->range;
4806 
4807   /* determine the number of messages to send, their lengths */
4808   /*---------------------------------------------------------*/
4809   len_s = merge->len_s;
4810 
4811   len          = 0; /* length of buf_si[] */
4812   merge->nsend = 0;
4813   for (proc=0; proc<size; proc++) {
4814     len_si[proc] = 0;
4815     if (proc == rank) {
4816       len_s[proc] = 0;
4817     } else {
4818       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4819       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4820     }
4821     if (len_s[proc]) {
4822       merge->nsend++;
4823       nrows = 0;
4824       for (i=owners[proc]; i<owners[proc+1]; i++) {
4825         if (ai[i+1] > ai[i]) nrows++;
4826       }
4827       len_si[proc] = 2*(nrows+1);
4828       len         += len_si[proc];
4829     }
4830   }
4831 
4832   /* determine the number and length of messages to receive for ij-structure */
4833   /*-------------------------------------------------------------------------*/
4834   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4835   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4836 
4837   /* post the Irecv of j-structure */
4838   /*-------------------------------*/
4839   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4840   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4841 
4842   /* post the Isend of j-structure */
4843   /*--------------------------------*/
4844   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4845 
4846   for (proc=0, k=0; proc<size; proc++) {
4847     if (!len_s[proc]) continue;
4848     i    = owners[proc];
4849     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4850     k++;
4851   }
4852 
4853   /* receives and sends of j-structure are complete */
4854   /*------------------------------------------------*/
4855   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4856   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4857 
4858   /* send and recv i-structure */
4859   /*---------------------------*/
4860   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4861   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4862 
4863   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4864   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4865   for (proc=0,k=0; proc<size; proc++) {
4866     if (!len_s[proc]) continue;
4867     /* form outgoing message for i-structure:
4868          buf_si[0]:                 nrows to be sent
4869                [1:nrows]:           row index (global)
4870                [nrows+1:2*nrows+1]: i-structure index
4871     */
4872     /*-------------------------------------------*/
4873     nrows       = len_si[proc]/2 - 1;
4874     buf_si_i    = buf_si + nrows+1;
4875     buf_si[0]   = nrows;
4876     buf_si_i[0] = 0;
4877     nrows       = 0;
4878     for (i=owners[proc]; i<owners[proc+1]; i++) {
4879       anzi = ai[i+1] - ai[i];
4880       if (anzi) {
4881         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4882         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4883         nrows++;
4884       }
4885     }
4886     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4887     k++;
4888     buf_si += len_si[proc];
4889   }
4890 
4891   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4892   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4893 
4894   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4895   for (i=0; i<merge->nrecv; i++) {
4896     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4897   }
4898 
4899   ierr = PetscFree(len_si);CHKERRQ(ierr);
4900   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4901   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4902   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4903   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4904   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4905   ierr = PetscFree(status);CHKERRQ(ierr);
4906 
4907   /* compute a local seq matrix in each processor */
4908   /*----------------------------------------------*/
4909   /* allocate bi array and free space for accumulating nonzero column info */
4910   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4911   bi[0] = 0;
4912 
4913   /* create and initialize a linked list */
4914   nlnk = N+1;
4915   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4916 
4917   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4918   len  = ai[owners[rank+1]] - ai[owners[rank]];
4919   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4920 
4921   current_space = free_space;
4922 
4923   /* determine symbolic info for each local row */
4924   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4925 
4926   for (k=0; k<merge->nrecv; k++) {
4927     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4928     nrows       = *buf_ri_k[k];
4929     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4930     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4931   }
4932 
4933   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4934   len  = 0;
4935   for (i=0; i<m; i++) {
4936     bnzi = 0;
4937     /* add local non-zero cols of this proc's seqmat into lnk */
4938     arow  = owners[rank] + i;
4939     anzi  = ai[arow+1] - ai[arow];
4940     aj    = a->j + ai[arow];
4941     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4942     bnzi += nlnk;
4943     /* add received col data into lnk */
4944     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4945       if (i == *nextrow[k]) { /* i-th row */
4946         anzi  = *(nextai[k]+1) - *nextai[k];
4947         aj    = buf_rj[k] + *nextai[k];
4948         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4949         bnzi += nlnk;
4950         nextrow[k]++; nextai[k]++;
4951       }
4952     }
4953     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4954 
4955     /* if free space is not available, make more free space */
4956     if (current_space->local_remaining<bnzi) {
4957       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4958       nspacedouble++;
4959     }
4960     /* copy data into free space, then initialize lnk */
4961     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4962     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4963 
4964     current_space->array           += bnzi;
4965     current_space->local_used      += bnzi;
4966     current_space->local_remaining -= bnzi;
4967 
4968     bi[i+1] = bi[i] + bnzi;
4969   }
4970 
4971   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4972 
4973   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4974   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4975   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4976 
4977   /* create symbolic parallel matrix B_mpi */
4978   /*---------------------------------------*/
4979   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4980   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4981   if (n==PETSC_DECIDE) {
4982     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4983   } else {
4984     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4985   }
4986   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4987   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4988   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4989   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4990   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4991 
4992   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4993   B_mpi->assembled    = PETSC_FALSE;
4994   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4995   merge->bi           = bi;
4996   merge->bj           = bj;
4997   merge->buf_ri       = buf_ri;
4998   merge->buf_rj       = buf_rj;
4999   merge->coi          = NULL;
5000   merge->coj          = NULL;
5001   merge->owners_co    = NULL;
5002 
5003   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5004 
5005   /* attach the supporting struct to B_mpi for reuse */
5006   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5007   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5008   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5009   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5010   *mpimat = B_mpi;
5011 
5012   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5013   PetscFunctionReturn(0);
5014 }
5015 
5016 /*@C
5017       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5018                  matrices from each processor
5019 
5020     Collective
5021 
5022    Input Parameters:
5023 +    comm - the communicators the parallel matrix will live on
5024 .    seqmat - the input sequential matrices
5025 .    m - number of local rows (or PETSC_DECIDE)
5026 .    n - number of local columns (or PETSC_DECIDE)
5027 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5028 
5029    Output Parameter:
5030 .    mpimat - the parallel matrix generated
5031 
5032     Level: advanced
5033 
5034    Notes:
5035      The dimensions of the sequential matrix in each processor MUST be the same.
5036      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5037      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5038 @*/
5039 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5040 {
5041   PetscErrorCode ierr;
5042   PetscMPIInt    size;
5043 
5044   PetscFunctionBegin;
5045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5046   if (size == 1) {
5047     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5048     if (scall == MAT_INITIAL_MATRIX) {
5049       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5050     } else {
5051       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5052     }
5053     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5054     PetscFunctionReturn(0);
5055   }
5056   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5057   if (scall == MAT_INITIAL_MATRIX) {
5058     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5059   }
5060   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5061   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5062   PetscFunctionReturn(0);
5063 }
5064 
5065 /*@
5066      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5067           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5068           with MatGetSize()
5069 
5070     Not Collective
5071 
5072    Input Parameters:
5073 +    A - the matrix
5074 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5075 
5076    Output Parameter:
5077 .    A_loc - the local sequential matrix generated
5078 
5079     Level: developer
5080 
5081 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5082 
5083 @*/
5084 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5085 {
5086   PetscErrorCode ierr;
5087   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5088   Mat_SeqAIJ     *mat,*a,*b;
5089   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5090   MatScalar      *aa,*ba,*cam;
5091   PetscScalar    *ca;
5092   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5093   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5094   PetscBool      match;
5095   MPI_Comm       comm;
5096   PetscMPIInt    size;
5097 
5098   PetscFunctionBegin;
5099   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5100   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5101   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5102   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5103   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5104 
5105   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5106   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5107   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5108   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5109   aa = a->a; ba = b->a;
5110   if (scall == MAT_INITIAL_MATRIX) {
5111     if (size == 1) {
5112       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5113       PetscFunctionReturn(0);
5114     }
5115 
5116     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5117     ci[0] = 0;
5118     for (i=0; i<am; i++) {
5119       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5120     }
5121     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5122     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5123     k    = 0;
5124     for (i=0; i<am; i++) {
5125       ncols_o = bi[i+1] - bi[i];
5126       ncols_d = ai[i+1] - ai[i];
5127       /* off-diagonal portion of A */
5128       for (jo=0; jo<ncols_o; jo++) {
5129         col = cmap[*bj];
5130         if (col >= cstart) break;
5131         cj[k]   = col; bj++;
5132         ca[k++] = *ba++;
5133       }
5134       /* diagonal portion of A */
5135       for (j=0; j<ncols_d; j++) {
5136         cj[k]   = cstart + *aj++;
5137         ca[k++] = *aa++;
5138       }
5139       /* off-diagonal portion of A */
5140       for (j=jo; j<ncols_o; j++) {
5141         cj[k]   = cmap[*bj++];
5142         ca[k++] = *ba++;
5143       }
5144     }
5145     /* put together the new matrix */
5146     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5147     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5148     /* Since these are PETSc arrays, change flags to free them as necessary. */
5149     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5150     mat->free_a  = PETSC_TRUE;
5151     mat->free_ij = PETSC_TRUE;
5152     mat->nonew   = 0;
5153   } else if (scall == MAT_REUSE_MATRIX) {
5154     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5155     ci = mat->i; cj = mat->j; cam = mat->a;
5156     for (i=0; i<am; i++) {
5157       /* off-diagonal portion of A */
5158       ncols_o = bi[i+1] - bi[i];
5159       for (jo=0; jo<ncols_o; jo++) {
5160         col = cmap[*bj];
5161         if (col >= cstart) break;
5162         *cam++ = *ba++; bj++;
5163       }
5164       /* diagonal portion of A */
5165       ncols_d = ai[i+1] - ai[i];
5166       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5167       /* off-diagonal portion of A */
5168       for (j=jo; j<ncols_o; j++) {
5169         *cam++ = *ba++; bj++;
5170       }
5171     }
5172   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5173   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5174   PetscFunctionReturn(0);
5175 }
5176 
5177 /*@C
5178      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5179 
5180     Not Collective
5181 
5182    Input Parameters:
5183 +    A - the matrix
5184 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5185 -    row, col - index sets of rows and columns to extract (or NULL)
5186 
5187    Output Parameter:
5188 .    A_loc - the local sequential matrix generated
5189 
5190     Level: developer
5191 
5192 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5193 
5194 @*/
5195 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5196 {
5197   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5198   PetscErrorCode ierr;
5199   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5200   IS             isrowa,iscola;
5201   Mat            *aloc;
5202   PetscBool      match;
5203 
5204   PetscFunctionBegin;
5205   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5206   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5207   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5208   if (!row) {
5209     start = A->rmap->rstart; end = A->rmap->rend;
5210     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5211   } else {
5212     isrowa = *row;
5213   }
5214   if (!col) {
5215     start = A->cmap->rstart;
5216     cmap  = a->garray;
5217     nzA   = a->A->cmap->n;
5218     nzB   = a->B->cmap->n;
5219     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5220     ncols = 0;
5221     for (i=0; i<nzB; i++) {
5222       if (cmap[i] < start) idx[ncols++] = cmap[i];
5223       else break;
5224     }
5225     imark = i;
5226     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5227     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5228     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5229   } else {
5230     iscola = *col;
5231   }
5232   if (scall != MAT_INITIAL_MATRIX) {
5233     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5234     aloc[0] = *A_loc;
5235   }
5236   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5237   if (!col) { /* attach global id of condensed columns */
5238     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5239   }
5240   *A_loc = aloc[0];
5241   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5242   if (!row) {
5243     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5244   }
5245   if (!col) {
5246     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5247   }
5248   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5249   PetscFunctionReturn(0);
5250 }
5251 
5252 /*@C
5253     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5254 
5255     Collective on Mat
5256 
5257    Input Parameters:
5258 +    A,B - the matrices in mpiaij format
5259 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5260 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5261 
5262    Output Parameter:
5263 +    rowb, colb - index sets of rows and columns of B to extract
5264 -    B_seq - the sequential matrix generated
5265 
5266     Level: developer
5267 
5268 @*/
5269 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5270 {
5271   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5272   PetscErrorCode ierr;
5273   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5274   IS             isrowb,iscolb;
5275   Mat            *bseq=NULL;
5276 
5277   PetscFunctionBegin;
5278   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5279     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5280   }
5281   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5282 
5283   if (scall == MAT_INITIAL_MATRIX) {
5284     start = A->cmap->rstart;
5285     cmap  = a->garray;
5286     nzA   = a->A->cmap->n;
5287     nzB   = a->B->cmap->n;
5288     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5289     ncols = 0;
5290     for (i=0; i<nzB; i++) {  /* row < local row index */
5291       if (cmap[i] < start) idx[ncols++] = cmap[i];
5292       else break;
5293     }
5294     imark = i;
5295     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5296     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5297     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5298     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5299   } else {
5300     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5301     isrowb  = *rowb; iscolb = *colb;
5302     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5303     bseq[0] = *B_seq;
5304   }
5305   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5306   *B_seq = bseq[0];
5307   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5308   if (!rowb) {
5309     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5310   } else {
5311     *rowb = isrowb;
5312   }
5313   if (!colb) {
5314     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5315   } else {
5316     *colb = iscolb;
5317   }
5318   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5319   PetscFunctionReturn(0);
5320 }
5321 
5322 /*
5323     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5324     of the OFF-DIAGONAL portion of local A
5325 
5326     Collective on Mat
5327 
5328    Input Parameters:
5329 +    A,B - the matrices in mpiaij format
5330 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5331 
5332    Output Parameter:
5333 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5334 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5335 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5336 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5337 
5338     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5339      for this matrix. This is not desirable..
5340 
5341     Level: developer
5342 
5343 */
5344 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5345 {
5346   PetscErrorCode         ierr;
5347   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5348   Mat_SeqAIJ             *b_oth;
5349   VecScatter             ctx;
5350   MPI_Comm               comm;
5351   const PetscMPIInt      *rprocs,*sprocs;
5352   const PetscInt         *srow,*rstarts,*sstarts;
5353   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5354   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5355   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5356   MPI_Request            *rwaits = NULL,*swaits = NULL;
5357   MPI_Status             rstatus;
5358   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5359 
5360   PetscFunctionBegin;
5361   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5362   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5363 
5364   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5365     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5366   }
5367   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5368   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5369 
5370   if (size == 1) {
5371     startsj_s = NULL;
5372     bufa_ptr  = NULL;
5373     *B_oth    = NULL;
5374     PetscFunctionReturn(0);
5375   }
5376 
5377   ctx = a->Mvctx;
5378   tag = ((PetscObject)ctx)->tag;
5379 
5380   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5381   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5382   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5383   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5384   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5385   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5386   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5387 
5388   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5389   if (scall == MAT_INITIAL_MATRIX) {
5390     /* i-array */
5391     /*---------*/
5392     /*  post receives */
5393     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5394     for (i=0; i<nrecvs; i++) {
5395       rowlen = rvalues + rstarts[i]*rbs;
5396       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5397       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5398     }
5399 
5400     /* pack the outgoing message */
5401     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5402 
5403     sstartsj[0] = 0;
5404     rstartsj[0] = 0;
5405     len         = 0; /* total length of j or a array to be sent */
5406     if (nsends) {
5407       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5408       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5409     }
5410     for (i=0; i<nsends; i++) {
5411       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5412       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5413       for (j=0; j<nrows; j++) {
5414         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5415         for (l=0; l<sbs; l++) {
5416           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5417 
5418           rowlen[j*sbs+l] = ncols;
5419 
5420           len += ncols;
5421           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5422         }
5423         k++;
5424       }
5425       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5426 
5427       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5428     }
5429     /* recvs and sends of i-array are completed */
5430     i = nrecvs;
5431     while (i--) {
5432       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5433     }
5434     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5435     ierr = PetscFree(svalues);CHKERRQ(ierr);
5436 
5437     /* allocate buffers for sending j and a arrays */
5438     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5439     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5440 
5441     /* create i-array of B_oth */
5442     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5443 
5444     b_othi[0] = 0;
5445     len       = 0; /* total length of j or a array to be received */
5446     k         = 0;
5447     for (i=0; i<nrecvs; i++) {
5448       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5449       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5450       for (j=0; j<nrows; j++) {
5451         b_othi[k+1] = b_othi[k] + rowlen[j];
5452         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5453         k++;
5454       }
5455       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5456     }
5457     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5458 
5459     /* allocate space for j and a arrrays of B_oth */
5460     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5461     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5462 
5463     /* j-array */
5464     /*---------*/
5465     /*  post receives of j-array */
5466     for (i=0; i<nrecvs; i++) {
5467       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5468       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5469     }
5470 
5471     /* pack the outgoing message j-array */
5472     if (nsends) k = sstarts[0];
5473     for (i=0; i<nsends; i++) {
5474       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5475       bufJ  = bufj+sstartsj[i];
5476       for (j=0; j<nrows; j++) {
5477         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5478         for (ll=0; ll<sbs; ll++) {
5479           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5480           for (l=0; l<ncols; l++) {
5481             *bufJ++ = cols[l];
5482           }
5483           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5484         }
5485       }
5486       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5487     }
5488 
5489     /* recvs and sends of j-array are completed */
5490     i = nrecvs;
5491     while (i--) {
5492       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5493     }
5494     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5495   } else if (scall == MAT_REUSE_MATRIX) {
5496     sstartsj = *startsj_s;
5497     rstartsj = *startsj_r;
5498     bufa     = *bufa_ptr;
5499     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5500     b_otha   = b_oth->a;
5501   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5502 
5503   /* a-array */
5504   /*---------*/
5505   /*  post receives of a-array */
5506   for (i=0; i<nrecvs; i++) {
5507     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5508     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5509   }
5510 
5511   /* pack the outgoing message a-array */
5512   if (nsends) k = sstarts[0];
5513   for (i=0; i<nsends; i++) {
5514     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5515     bufA  = bufa+sstartsj[i];
5516     for (j=0; j<nrows; j++) {
5517       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5518       for (ll=0; ll<sbs; ll++) {
5519         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5520         for (l=0; l<ncols; l++) {
5521           *bufA++ = vals[l];
5522         }
5523         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5524       }
5525     }
5526     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5527   }
5528   /* recvs and sends of a-array are completed */
5529   i = nrecvs;
5530   while (i--) {
5531     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5532   }
5533   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5534   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5535 
5536   if (scall == MAT_INITIAL_MATRIX) {
5537     /* put together the new matrix */
5538     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5539 
5540     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5541     /* Since these are PETSc arrays, change flags to free them as necessary. */
5542     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5543     b_oth->free_a  = PETSC_TRUE;
5544     b_oth->free_ij = PETSC_TRUE;
5545     b_oth->nonew   = 0;
5546 
5547     ierr = PetscFree(bufj);CHKERRQ(ierr);
5548     if (!startsj_s || !bufa_ptr) {
5549       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5550       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5551     } else {
5552       *startsj_s = sstartsj;
5553       *startsj_r = rstartsj;
5554       *bufa_ptr  = bufa;
5555     }
5556   }
5557 
5558   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5559   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5560   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5561   PetscFunctionReturn(0);
5562 }
5563 
5564 /*@C
5565   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5566 
5567   Not Collective
5568 
5569   Input Parameters:
5570 . A - The matrix in mpiaij format
5571 
5572   Output Parameter:
5573 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5574 . colmap - A map from global column index to local index into lvec
5575 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5576 
5577   Level: developer
5578 
5579 @*/
5580 #if defined(PETSC_USE_CTABLE)
5581 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5582 #else
5583 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5584 #endif
5585 {
5586   Mat_MPIAIJ *a;
5587 
5588   PetscFunctionBegin;
5589   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5590   PetscValidPointer(lvec, 2);
5591   PetscValidPointer(colmap, 3);
5592   PetscValidPointer(multScatter, 4);
5593   a = (Mat_MPIAIJ*) A->data;
5594   if (lvec) *lvec = a->lvec;
5595   if (colmap) *colmap = a->colmap;
5596   if (multScatter) *multScatter = a->Mvctx;
5597   PetscFunctionReturn(0);
5598 }
5599 
5600 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5601 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5602 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5603 #if defined(PETSC_HAVE_MKL_SPARSE)
5604 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5605 #endif
5606 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5607 #if defined(PETSC_HAVE_ELEMENTAL)
5608 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5609 #endif
5610 #if defined(PETSC_HAVE_HYPRE)
5611 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5612 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5613 #endif
5614 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5615 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5616 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5617 
5618 /*
5619     Computes (B'*A')' since computing B*A directly is untenable
5620 
5621                n                       p                          p
5622         (              )       (              )         (                  )
5623       m (      A       )  *  n (       B      )   =   m (         C        )
5624         (              )       (              )         (                  )
5625 
5626 */
5627 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5628 {
5629   PetscErrorCode ierr;
5630   Mat            At,Bt,Ct;
5631 
5632   PetscFunctionBegin;
5633   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5634   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5635   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5636   ierr = MatDestroy(&At);CHKERRQ(ierr);
5637   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5638   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5639   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5640   PetscFunctionReturn(0);
5641 }
5642 
5643 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5644 {
5645   PetscErrorCode ierr;
5646   PetscInt       m=A->rmap->n,n=B->cmap->n;
5647   Mat            Cmat;
5648 
5649   PetscFunctionBegin;
5650   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5651   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5652   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5653   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5654   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5655   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5656   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5657   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5658 
5659   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5660 
5661   *C = Cmat;
5662   PetscFunctionReturn(0);
5663 }
5664 
5665 /* ----------------------------------------------------------------*/
5666 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5667 {
5668   PetscErrorCode ierr;
5669 
5670   PetscFunctionBegin;
5671   if (scall == MAT_INITIAL_MATRIX) {
5672     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5673     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5674     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5675   }
5676   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5677   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5678   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5679   PetscFunctionReturn(0);
5680 }
5681 
5682 /*MC
5683    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5684 
5685    Options Database Keys:
5686 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5687 
5688   Level: beginner
5689 
5690 .seealso: MatCreateAIJ()
5691 M*/
5692 
5693 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5694 {
5695   Mat_MPIAIJ     *b;
5696   PetscErrorCode ierr;
5697   PetscMPIInt    size;
5698 
5699   PetscFunctionBegin;
5700   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5701 
5702   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5703   B->data       = (void*)b;
5704   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5705   B->assembled  = PETSC_FALSE;
5706   B->insertmode = NOT_SET_VALUES;
5707   b->size       = size;
5708 
5709   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5710 
5711   /* build cache for off array entries formed */
5712   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5713 
5714   b->donotstash  = PETSC_FALSE;
5715   b->colmap      = 0;
5716   b->garray      = 0;
5717   b->roworiented = PETSC_TRUE;
5718 
5719   /* stuff used for matrix vector multiply */
5720   b->lvec  = NULL;
5721   b->Mvctx = NULL;
5722 
5723   /* stuff for MatGetRow() */
5724   b->rowindices   = 0;
5725   b->rowvalues    = 0;
5726   b->getrowactive = PETSC_FALSE;
5727 
5728   /* flexible pointer used in CUSP/CUSPARSE classes */
5729   b->spptr = NULL;
5730 
5731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5734   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5735   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5736   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5737   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5738   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5739   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5740   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5741 #if defined(PETSC_HAVE_MKL_SPARSE)
5742   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5743 #endif
5744   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5745   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5746 #if defined(PETSC_HAVE_ELEMENTAL)
5747   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5748 #endif
5749 #if defined(PETSC_HAVE_HYPRE)
5750   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5751 #endif
5752   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5753   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5754   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5755   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5756   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5757 #if defined(PETSC_HAVE_HYPRE)
5758   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5759 #endif
5760   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5761   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5762   PetscFunctionReturn(0);
5763 }
5764 
5765 /*@C
5766      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5767          and "off-diagonal" part of the matrix in CSR format.
5768 
5769    Collective
5770 
5771    Input Parameters:
5772 +  comm - MPI communicator
5773 .  m - number of local rows (Cannot be PETSC_DECIDE)
5774 .  n - This value should be the same as the local size used in creating the
5775        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5776        calculated if N is given) For square matrices n is almost always m.
5777 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5778 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5779 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5780 .   j - column indices
5781 .   a - matrix values
5782 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5783 .   oj - column indices
5784 -   oa - matrix values
5785 
5786    Output Parameter:
5787 .   mat - the matrix
5788 
5789    Level: advanced
5790 
5791    Notes:
5792        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5793        must free the arrays once the matrix has been destroyed and not before.
5794 
5795        The i and j indices are 0 based
5796 
5797        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5798 
5799        This sets local rows and cannot be used to set off-processor values.
5800 
5801        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5802        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5803        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5804        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5805        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5806        communication if it is known that only local entries will be set.
5807 
5808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5809           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5810 @*/
5811 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5812 {
5813   PetscErrorCode ierr;
5814   Mat_MPIAIJ     *maij;
5815 
5816   PetscFunctionBegin;
5817   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5818   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5819   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5820   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5821   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5822   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5823   maij = (Mat_MPIAIJ*) (*mat)->data;
5824 
5825   (*mat)->preallocated = PETSC_TRUE;
5826 
5827   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5828   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5829 
5830   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5831   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5832 
5833   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5834   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5835   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5836   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5837 
5838   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5839   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5840   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5841   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5842   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5843   PetscFunctionReturn(0);
5844 }
5845 
5846 /*
5847     Special version for direct calls from Fortran
5848 */
5849 #include <petsc/private/fortranimpl.h>
5850 
5851 /* Change these macros so can be used in void function */
5852 #undef CHKERRQ
5853 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5854 #undef SETERRQ2
5855 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5856 #undef SETERRQ3
5857 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5858 #undef SETERRQ
5859 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5860 
5861 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5862 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5863 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5864 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5865 #else
5866 #endif
5867 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5868 {
5869   Mat            mat  = *mmat;
5870   PetscInt       m    = *mm, n = *mn;
5871   InsertMode     addv = *maddv;
5872   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5873   PetscScalar    value;
5874   PetscErrorCode ierr;
5875 
5876   MatCheckPreallocated(mat,1);
5877   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5878 
5879 #if defined(PETSC_USE_DEBUG)
5880   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5881 #endif
5882   {
5883     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5884     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5885     PetscBool roworiented = aij->roworiented;
5886 
5887     /* Some Variables required in the macro */
5888     Mat        A                 = aij->A;
5889     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5890     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5891     MatScalar  *aa               = a->a;
5892     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5893     Mat        B                 = aij->B;
5894     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5895     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5896     MatScalar  *ba               = b->a;
5897 
5898     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5899     PetscInt  nonew = a->nonew;
5900     MatScalar *ap1,*ap2;
5901 
5902     PetscFunctionBegin;
5903     for (i=0; i<m; i++) {
5904       if (im[i] < 0) continue;
5905 #if defined(PETSC_USE_DEBUG)
5906       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5907 #endif
5908       if (im[i] >= rstart && im[i] < rend) {
5909         row      = im[i] - rstart;
5910         lastcol1 = -1;
5911         rp1      = aj + ai[row];
5912         ap1      = aa + ai[row];
5913         rmax1    = aimax[row];
5914         nrow1    = ailen[row];
5915         low1     = 0;
5916         high1    = nrow1;
5917         lastcol2 = -1;
5918         rp2      = bj + bi[row];
5919         ap2      = ba + bi[row];
5920         rmax2    = bimax[row];
5921         nrow2    = bilen[row];
5922         low2     = 0;
5923         high2    = nrow2;
5924 
5925         for (j=0; j<n; j++) {
5926           if (roworiented) value = v[i*n+j];
5927           else value = v[i+j*m];
5928           if (in[j] >= cstart && in[j] < cend) {
5929             col = in[j] - cstart;
5930             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5931             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5932           } else if (in[j] < 0) continue;
5933 #if defined(PETSC_USE_DEBUG)
5934           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5935           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5936 #endif
5937           else {
5938             if (mat->was_assembled) {
5939               if (!aij->colmap) {
5940                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5941               }
5942 #if defined(PETSC_USE_CTABLE)
5943               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5944               col--;
5945 #else
5946               col = aij->colmap[in[j]] - 1;
5947 #endif
5948               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5949               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5950                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5951                 col  =  in[j];
5952                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5953                 B     = aij->B;
5954                 b     = (Mat_SeqAIJ*)B->data;
5955                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5956                 rp2   = bj + bi[row];
5957                 ap2   = ba + bi[row];
5958                 rmax2 = bimax[row];
5959                 nrow2 = bilen[row];
5960                 low2  = 0;
5961                 high2 = nrow2;
5962                 bm    = aij->B->rmap->n;
5963                 ba    = b->a;
5964               }
5965             } else col = in[j];
5966             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5967           }
5968         }
5969       } else if (!aij->donotstash) {
5970         if (roworiented) {
5971           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5972         } else {
5973           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5974         }
5975       }
5976     }
5977   }
5978   PetscFunctionReturnVoid();
5979 }
5980