xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ae8e66a00492136f621e81c932a55ab8c77698f3)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 PetscErrorCode MatSetValues_MPIAIJ_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz)
636 {
637   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
638   Mat            A      = aij->A; /* diagonal part of the matrix */
639   Mat            B      = aij->B; /* offdiagonal part of the matrix */
640   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
641   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
642   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
643   PetscInt       *ailen = a->ilen,*aj = a->j;
644   PetscInt       *bilen = b->ilen,*bj = b->j;
645   PetscInt       am     = aij->A->rmap->n,i,j;
646   PetscInt       col, diag_so_far=0, offd_so_far=0;
647 
648   PetscFunctionBegin;
649   /* Iterate over all rows of the matrix */
650   for (j=0; j<am; j++) {
651     for (i=0; i<dnz[j]+onz[j]; i++) {
652       col = i + mat_i[j];
653       /* If column is in the diagonal */
654       if (mat_j[col] >= cstart && mat_j[col] < cend) {
655         aj[diag_so_far++] = mat_j[col] - cstart;
656       } else { /* off-diagonal entries */
657         bj[offd_so_far++] = mat_j[col];
658       }
659     }
660     ailen[j] = dnz[j];
661     bilen[j] = onz[j];
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 
667 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscErrorCode ierr;
671   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
672   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
673 
674   PetscFunctionBegin;
675   for (i=0; i<m; i++) {
676     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
677     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
678     if (idxm[i] >= rstart && idxm[i] < rend) {
679       row = idxm[i] - rstart;
680       for (j=0; j<n; j++) {
681         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
682         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
683         if (idxn[j] >= cstart && idxn[j] < cend) {
684           col  = idxn[j] - cstart;
685           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
686         } else {
687           if (!aij->colmap) {
688             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
689           }
690 #if defined(PETSC_USE_CTABLE)
691           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
692           col--;
693 #else
694           col = aij->colmap[idxn[j]] - 1;
695 #endif
696           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
697           else {
698             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
699           }
700         }
701       }
702     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
708 
709 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
710 {
711   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
712   PetscErrorCode ierr;
713   PetscInt       nstash,reallocs;
714 
715   PetscFunctionBegin;
716   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
717 
718   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
719   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
720   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
725 {
726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
727   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
728   PetscErrorCode ierr;
729   PetscMPIInt    n;
730   PetscInt       i,j,rstart,ncols,flg;
731   PetscInt       *row,*col;
732   PetscBool      other_disassembled;
733   PetscScalar    *val;
734 
735   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
736 
737   PetscFunctionBegin;
738   if (!aij->donotstash && !mat->nooffprocentries) {
739     while (1) {
740       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
741       if (!flg) break;
742 
743       for (i=0; i<n; ) {
744         /* Now identify the consecutive vals belonging to the same row */
745         for (j=i,rstart=row[j]; j<n; j++) {
746           if (row[j] != rstart) break;
747         }
748         if (j < n) ncols = j-i;
749         else       ncols = n-i;
750         /* Now assemble all these values with a single function call */
751         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
752 
753         i = j;
754       }
755     }
756     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
757   }
758   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
759   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
760 
761   /* determine if any processor has disassembled, if so we must
762      also disassemble ourselfs, in order that we may reassemble. */
763   /*
764      if nonzero structure of submatrix B cannot change then we know that
765      no processor disassembled thus we can skip this stuff
766   */
767   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
768     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
769     if (mat->was_assembled && !other_disassembled) {
770       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
771     }
772   }
773   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
774     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
775   }
776   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
777   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
778   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
779 
780   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
781 
782   aij->rowvalues = 0;
783 
784   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
785   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
786 
787   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
788   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
789     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
790     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
796 {
797   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
798   PetscErrorCode ierr;
799 
800   PetscFunctionBegin;
801   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
802   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
803   PetscFunctionReturn(0);
804 }
805 
806 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
807 {
808   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
809   PetscInt      *lrows;
810   PetscInt       r, len;
811   PetscBool      cong;
812   PetscErrorCode ierr;
813 
814   PetscFunctionBegin;
815   /* get locally owned rows */
816   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
817   /* fix right hand side if needed */
818   if (x && b) {
819     const PetscScalar *xx;
820     PetscScalar       *bb;
821 
822     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
823     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
824     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
825     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
826     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
827   }
828   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
829   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
830   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
831   if ((diag != 0.0) && cong) {
832     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
833   } else if (diag != 0.0) {
834     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
835     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
836     for (r = 0; r < len; ++r) {
837       const PetscInt row = lrows[r] + A->rmap->rstart;
838       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
839     }
840     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
841     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
842   } else {
843     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
844   }
845   ierr = PetscFree(lrows);CHKERRQ(ierr);
846 
847   /* only change matrix nonzero state if pattern was allowed to be changed */
848   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
849     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
850     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
856 {
857   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
858   PetscErrorCode    ierr;
859   PetscMPIInt       n = A->rmap->n;
860   PetscInt          i,j,r,m,p = 0,len = 0;
861   PetscInt          *lrows,*owners = A->rmap->range;
862   PetscSFNode       *rrows;
863   PetscSF           sf;
864   const PetscScalar *xx;
865   PetscScalar       *bb,*mask;
866   Vec               xmask,lmask;
867   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
868   const PetscInt    *aj, *ii,*ridx;
869   PetscScalar       *aa;
870 
871   PetscFunctionBegin;
872   /* Create SF where leaves are input rows and roots are owned rows */
873   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
874   for (r = 0; r < n; ++r) lrows[r] = -1;
875   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
876   for (r = 0; r < N; ++r) {
877     const PetscInt idx   = rows[r];
878     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
879     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
880       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
881     }
882     rrows[r].rank  = p;
883     rrows[r].index = rows[r] - owners[p];
884   }
885   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
886   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
887   /* Collect flags for rows to be zeroed */
888   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
889   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
890   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
891   /* Compress and put in row numbers */
892   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
893   /* zero diagonal part of matrix */
894   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
895   /* handle off diagonal part of matrix */
896   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
897   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
898   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
899   for (i=0; i<len; i++) bb[lrows[i]] = 1;
900   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
901   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
902   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
904   if (x) {
905     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
908     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
909   }
910   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
911   /* remove zeroed rows of off diagonal matrix */
912   ii = aij->i;
913   for (i=0; i<len; i++) {
914     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
915   }
916   /* loop over all elements of off process part of matrix zeroing removed columns*/
917   if (aij->compressedrow.use) {
918     m    = aij->compressedrow.nrows;
919     ii   = aij->compressedrow.i;
920     ridx = aij->compressedrow.rindex;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925 
926       for (j=0; j<n; j++) {
927         if (PetscAbsScalar(mask[*aj])) {
928           if (b) bb[*ridx] -= *aa*xx[*aj];
929           *aa = 0.0;
930         }
931         aa++;
932         aj++;
933       }
934       ridx++;
935     }
936   } else { /* do not use compressed row format */
937     m = l->B->rmap->n;
938     for (i=0; i<m; i++) {
939       n  = ii[i+1] - ii[i];
940       aj = aij->j + ii[i];
941       aa = aij->a + ii[i];
942       for (j=0; j<n; j++) {
943         if (PetscAbsScalar(mask[*aj])) {
944           if (b) bb[i] -= *aa*xx[*aj];
945           *aa = 0.0;
946         }
947         aa++;
948         aj++;
949       }
950     }
951   }
952   if (x) {
953     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
954     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
955   }
956   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
957   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
958   ierr = PetscFree(lrows);CHKERRQ(ierr);
959 
960   /* only change matrix nonzero state if pattern was allowed to be changed */
961   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
962     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
963     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
964   }
965   PetscFunctionReturn(0);
966 }
967 
968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
969 {
970   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
971   PetscErrorCode ierr;
972   PetscInt       nt;
973   VecScatter     Mvctx = a->Mvctx;
974 
975   PetscFunctionBegin;
976   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
977   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
978 
979   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
981   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
982   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
997 {
998   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
999   PetscErrorCode ierr;
1000   VecScatter     Mvctx = a->Mvctx;
1001 
1002   PetscFunctionBegin;
1003   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1004   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1014   PetscErrorCode ierr;
1015   PetscBool      merged;
1016 
1017   PetscFunctionBegin;
1018   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1019   /* do nondiagonal part */
1020   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1021   if (!merged) {
1022     /* send it on its way */
1023     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1024     /* do local part */
1025     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1026     /* receive remote parts: note this assumes the values are not actually */
1027     /* added in yy until the next line, */
1028     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1029   } else {
1030     /* do local part */
1031     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1032     /* send it on its way */
1033     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1034     /* values actually were received in the Begin() but we need to call this nop */
1035     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1036   }
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1041 {
1042   MPI_Comm       comm;
1043   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1044   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1045   IS             Me,Notme;
1046   PetscErrorCode ierr;
1047   PetscInt       M,N,first,last,*notme,i;
1048   PetscMPIInt    size;
1049 
1050   PetscFunctionBegin;
1051   /* Easy test: symmetric diagonal block */
1052   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1053   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1054   if (!*f) PetscFunctionReturn(0);
1055   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1057   if (size == 1) PetscFunctionReturn(0);
1058 
1059   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1060   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1061   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1062   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1063   for (i=0; i<first; i++) notme[i] = i;
1064   for (i=last; i<M; i++) notme[i-last+first] = i;
1065   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1066   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1067   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1068   Aoff = Aoffs[0];
1069   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1070   Boff = Boffs[0];
1071   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1072   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1073   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1074   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1075   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1076   ierr = PetscFree(notme);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1081 {
1082   PetscErrorCode ierr;
1083 
1084   PetscFunctionBegin;
1085   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1097   /* send it on its way */
1098   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1099   /* do local part */
1100   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1101   /* receive remote parts */
1102   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /*
1107   This only works correctly for square matrices where the subblock A->A is the
1108    diagonal block
1109 */
1110 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1111 {
1112   PetscErrorCode ierr;
1113   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1114 
1115   PetscFunctionBegin;
1116   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1117   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1118   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1123 {
1124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1129   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1134 {
1135   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139 #if defined(PETSC_USE_LOG)
1140   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1141 #endif
1142   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1143   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1144   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1145   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1146 #if defined(PETSC_USE_CTABLE)
1147   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1148 #else
1149   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1150 #endif
1151   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1152   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1153   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1154   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1155   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1156   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1157   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1158 
1159   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1168 #if defined(PETSC_HAVE_ELEMENTAL)
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1170 #endif
1171 #if defined(PETSC_HAVE_HYPRE)
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1181 {
1182   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1183   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1184   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1185   PetscErrorCode ierr;
1186   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1187   int            fd;
1188   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1189   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1190   PetscScalar    *column_values;
1191   PetscInt       message_count,flowcontrolcount;
1192   FILE           *file;
1193 
1194   PetscFunctionBegin;
1195   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1197   nz   = A->nz + B->nz;
1198   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1199   if (!rank) {
1200     header[0] = MAT_FILE_CLASSID;
1201     header[1] = mat->rmap->N;
1202     header[2] = mat->cmap->N;
1203 
1204     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1205     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     /* get largest number of rows any processor has */
1207     rlen  = mat->rmap->n;
1208     range = mat->rmap->range;
1209     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1210   } else {
1211     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     rlen = mat->rmap->n;
1213   }
1214 
1215   /* load up the local row counts */
1216   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1217   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1218 
1219   /* store the row lengths to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1225       rlen = range[i+1] - range[i];
1226       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     }
1229     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1230   } else {
1231     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1232     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1234   }
1235   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1236 
1237   /* load up the local column indices */
1238   nzmax = nz; /* th processor needs space a largest processor needs */
1239   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1241   cnt   = 0;
1242   for (i=0; i<mat->rmap->n; i++) {
1243     for (j=B->i[i]; j<B->i[i+1]; j++) {
1244       if ((col = garray[B->j[j]]) > cstart) break;
1245       column_indices[cnt++] = col;
1246     }
1247     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1248     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1249   }
1250   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251 
1252   /* store the column indices to the file */
1253   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1254   if (!rank) {
1255     MPI_Status status;
1256     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1257     for (i=1; i<size; i++) {
1258       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1259       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1260       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1261       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1262       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1267     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1272 
1273   /* load up the local column values */
1274   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1275   cnt  = 0;
1276   for (i=0; i<mat->rmap->n; i++) {
1277     for (j=B->i[i]; j<B->i[i+1]; j++) {
1278       if (garray[B->j[j]] > cstart) break;
1279       column_values[cnt++] = B->a[j];
1280     }
1281     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1282     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1283   }
1284   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1285 
1286   /* store the column values to the file */
1287   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1288   if (!rank) {
1289     MPI_Status status;
1290     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1291     for (i=1; i<size; i++) {
1292       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1293       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1294       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1295       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1297     }
1298     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1299   } else {
1300     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1301     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1304   }
1305   ierr = PetscFree(column_values);CHKERRQ(ierr);
1306 
1307   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1308   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #include <petscdraw.h>
1313 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1314 {
1315   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1316   PetscErrorCode    ierr;
1317   PetscMPIInt       rank = aij->rank,size = aij->size;
1318   PetscBool         isdraw,iascii,isbinary;
1319   PetscViewer       sviewer;
1320   PetscViewerFormat format;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1326   if (iascii) {
1327     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1328     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1329       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1330       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1331       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1332       for (i=0; i<(PetscInt)size; i++) {
1333         nmax = PetscMax(nmax,nz[i]);
1334         nmin = PetscMin(nmin,nz[i]);
1335         navg += nz[i];
1336       }
1337       ierr = PetscFree(nz);CHKERRQ(ierr);
1338       navg = navg/size;
1339       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1340       PetscFunctionReturn(0);
1341     }
1342     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1343     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1344       MatInfo   info;
1345       PetscBool inodes;
1346 
1347       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1348       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1349       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1351       if (!inodes) {
1352         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1353                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1354       } else {
1355         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1356                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1357       }
1358       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1359       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1360       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1361       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1362       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1363       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1365       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1366       PetscFunctionReturn(0);
1367     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1368       PetscInt inodecount,inodelimit,*inodes;
1369       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1370       if (inodes) {
1371         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1372       } else {
1373         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1374       }
1375       PetscFunctionReturn(0);
1376     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1377       PetscFunctionReturn(0);
1378     }
1379   } else if (isbinary) {
1380     if (size == 1) {
1381       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1382       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1383     } else {
1384       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1385     }
1386     PetscFunctionReturn(0);
1387   } else if (isdraw) {
1388     PetscDraw draw;
1389     PetscBool isnull;
1390     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1391     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1392     if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1452     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1459 {
1460   PetscErrorCode ierr;
1461   PetscBool      iascii,isdraw,issocket,isbinary;
1462 
1463   PetscFunctionBegin;
1464   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1468   if (iascii || isdraw || isbinary || issocket) {
1469     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1475 {
1476   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1477   PetscErrorCode ierr;
1478   Vec            bb1 = 0;
1479   PetscBool      hasop;
1480 
1481   PetscFunctionBegin;
1482   if (flag == SOR_APPLY_UPPER) {
1483     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1484     PetscFunctionReturn(0);
1485   }
1486 
1487   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1488     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1489   }
1490 
1491   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1492     if (flag & SOR_ZERO_INITIAL_GUESS) {
1493       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1494       its--;
1495     }
1496 
1497     while (its--) {
1498       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500 
1501       /* update rhs: bb1 = bb - B*x */
1502       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1503       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1504 
1505       /* local sweep */
1506       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1507     }
1508   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1509     if (flag & SOR_ZERO_INITIAL_GUESS) {
1510       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1511       its--;
1512     }
1513     while (its--) {
1514       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1516 
1517       /* update rhs: bb1 = bb - B*x */
1518       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1519       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1520 
1521       /* local sweep */
1522       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1523     }
1524   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1525     if (flag & SOR_ZERO_INITIAL_GUESS) {
1526       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1527       its--;
1528     }
1529     while (its--) {
1530       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1532 
1533       /* update rhs: bb1 = bb - B*x */
1534       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1535       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1536 
1537       /* local sweep */
1538       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1539     }
1540   } else if (flag & SOR_EISENSTAT) {
1541     Vec xx1;
1542 
1543     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1544     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1545 
1546     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548     if (!mat->diag) {
1549       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1550       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1551     }
1552     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1553     if (hasop) {
1554       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1555     } else {
1556       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1557     }
1558     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1559 
1560     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1561 
1562     /* local sweep */
1563     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1564     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1565     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1566   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1567 
1568   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1569 
1570   matin->factorerrortype = mat->A->factorerrortype;
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1575 {
1576   Mat            aA,aB,Aperm;
1577   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1578   PetscScalar    *aa,*ba;
1579   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1580   PetscSF        rowsf,sf;
1581   IS             parcolp = NULL;
1582   PetscBool      done;
1583   PetscErrorCode ierr;
1584 
1585   PetscFunctionBegin;
1586   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1587   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1588   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1589   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1590 
1591   /* Invert row permutation to find out where my rows should go */
1592   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1593   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1594   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1595   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1596   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1597   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1598 
1599   /* Invert column permutation to find out where my columns should go */
1600   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1601   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1602   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1603   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1604   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1605   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1606   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1607 
1608   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1609   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1610   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1611 
1612   /* Find out where my gcols should go */
1613   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1614   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1615   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1616   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1617   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1618   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1620   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1621 
1622   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1623   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1624   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1625   for (i=0; i<m; i++) {
1626     PetscInt row = rdest[i],rowner;
1627     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1628     for (j=ai[i]; j<ai[i+1]; j++) {
1629       PetscInt cowner,col = cdest[aj[j]];
1630       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1631       if (rowner == cowner) dnnz[i]++;
1632       else onnz[i]++;
1633     }
1634     for (j=bi[i]; j<bi[i+1]; j++) {
1635       PetscInt cowner,col = gcdest[bj[j]];
1636       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1637       if (rowner == cowner) dnnz[i]++;
1638       else onnz[i]++;
1639     }
1640   }
1641   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1642   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1643   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1644   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1645   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1646 
1647   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1648   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1649   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1650   for (i=0; i<m; i++) {
1651     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1652     PetscInt j0,rowlen;
1653     rowlen = ai[i+1] - ai[i];
1654     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1655       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1656       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1657     }
1658     rowlen = bi[i+1] - bi[i];
1659     for (j0=j=0; j<rowlen; j0=j) {
1660       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1661       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1662     }
1663   }
1664   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1667   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1668   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1669   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1670   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1671   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1672   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1673   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1674   *B = Aperm;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1679 {
1680   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1685   if (ghosts) *ghosts = aij->garray;
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1690 {
1691   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1692   Mat            A    = mat->A,B = mat->B;
1693   PetscErrorCode ierr;
1694   PetscReal      isend[5],irecv[5];
1695 
1696   PetscFunctionBegin;
1697   info->block_size = 1.0;
1698   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1699 
1700   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1701   isend[3] = info->memory;  isend[4] = info->mallocs;
1702 
1703   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1704 
1705   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1706   isend[3] += info->memory;  isend[4] += info->mallocs;
1707   if (flag == MAT_LOCAL) {
1708     info->nz_used      = isend[0];
1709     info->nz_allocated = isend[1];
1710     info->nz_unneeded  = isend[2];
1711     info->memory       = isend[3];
1712     info->mallocs      = isend[4];
1713   } else if (flag == MAT_GLOBAL_MAX) {
1714     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1715 
1716     info->nz_used      = irecv[0];
1717     info->nz_allocated = irecv[1];
1718     info->nz_unneeded  = irecv[2];
1719     info->memory       = irecv[3];
1720     info->mallocs      = irecv[4];
1721   } else if (flag == MAT_GLOBAL_SUM) {
1722     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1723 
1724     info->nz_used      = irecv[0];
1725     info->nz_allocated = irecv[1];
1726     info->nz_unneeded  = irecv[2];
1727     info->memory       = irecv[3];
1728     info->mallocs      = irecv[4];
1729   }
1730   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1731   info->fill_ratio_needed = 0;
1732   info->factor_mallocs    = 0;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1737 {
1738   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   switch (op) {
1743   case MAT_NEW_NONZERO_LOCATIONS:
1744   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1745   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1746   case MAT_KEEP_NONZERO_PATTERN:
1747   case MAT_NEW_NONZERO_LOCATION_ERR:
1748   case MAT_USE_INODES:
1749   case MAT_IGNORE_ZERO_ENTRIES:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_ROW_ORIENTED:
1755     MatCheckPreallocated(A,1);
1756     a->roworiented = flg;
1757 
1758     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1759     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1760     break;
1761   case MAT_NEW_DIAGONALS:
1762     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1763     break;
1764   case MAT_IGNORE_OFF_PROC_ENTRIES:
1765     a->donotstash = flg;
1766     break;
1767   case MAT_SPD:
1768     A->spd_set = PETSC_TRUE;
1769     A->spd     = flg;
1770     if (flg) {
1771       A->symmetric                  = PETSC_TRUE;
1772       A->structurally_symmetric     = PETSC_TRUE;
1773       A->symmetric_set              = PETSC_TRUE;
1774       A->structurally_symmetric_set = PETSC_TRUE;
1775     }
1776     break;
1777   case MAT_SYMMETRIC:
1778     MatCheckPreallocated(A,1);
1779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1780     break;
1781   case MAT_STRUCTURALLY_SYMMETRIC:
1782     MatCheckPreallocated(A,1);
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_HERMITIAN:
1786     MatCheckPreallocated(A,1);
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     break;
1789   case MAT_SYMMETRY_ETERNAL:
1790     MatCheckPreallocated(A,1);
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_SUBMAT_SINGLEIS:
1794     A->submat_singleis = flg;
1795     break;
1796   case MAT_STRUCTURE_ONLY:
1797     /* The option is handled directly by MatSetOption() */
1798     break;
1799   default:
1800     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1801   }
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1806 {
1807   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1808   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1809   PetscErrorCode ierr;
1810   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1811   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1812   PetscInt       *cmap,*idx_p;
1813 
1814   PetscFunctionBegin;
1815   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1816   mat->getrowactive = PETSC_TRUE;
1817 
1818   if (!mat->rowvalues && (idx || v)) {
1819     /*
1820         allocate enough space to hold information from the longest row.
1821     */
1822     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1823     PetscInt   max = 1,tmp;
1824     for (i=0; i<matin->rmap->n; i++) {
1825       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1826       if (max < tmp) max = tmp;
1827     }
1828     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1829   }
1830 
1831   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1832   lrow = row - rstart;
1833 
1834   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1835   if (!v)   {pvA = 0; pvB = 0;}
1836   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1837   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1838   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1839   nztot = nzA + nzB;
1840 
1841   cmap = mat->garray;
1842   if (v  || idx) {
1843     if (nztot) {
1844       /* Sort by increasing column numbers, assuming A and B already sorted */
1845       PetscInt imark = -1;
1846       if (v) {
1847         *v = v_p = mat->rowvalues;
1848         for (i=0; i<nzB; i++) {
1849           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1850           else break;
1851         }
1852         imark = i;
1853         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1854         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1855       }
1856       if (idx) {
1857         *idx = idx_p = mat->rowindices;
1858         if (imark > -1) {
1859           for (i=0; i<imark; i++) {
1860             idx_p[i] = cmap[cworkB[i]];
1861           }
1862         } else {
1863           for (i=0; i<nzB; i++) {
1864             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1865             else break;
1866           }
1867           imark = i;
1868         }
1869         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1870         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1871       }
1872     } else {
1873       if (idx) *idx = 0;
1874       if (v)   *v   = 0;
1875     }
1876   }
1877   *nz  = nztot;
1878   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1879   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1884 {
1885   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1886 
1887   PetscFunctionBegin;
1888   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1889   aij->getrowactive = PETSC_FALSE;
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1894 {
1895   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1896   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1897   PetscErrorCode ierr;
1898   PetscInt       i,j,cstart = mat->cmap->rstart;
1899   PetscReal      sum = 0.0;
1900   MatScalar      *v;
1901 
1902   PetscFunctionBegin;
1903   if (aij->size == 1) {
1904     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1905   } else {
1906     if (type == NORM_FROBENIUS) {
1907       v = amat->a;
1908       for (i=0; i<amat->nz; i++) {
1909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1910       }
1911       v = bmat->a;
1912       for (i=0; i<bmat->nz; i++) {
1913         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1914       }
1915       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1916       *norm = PetscSqrtReal(*norm);
1917       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1918     } else if (type == NORM_1) { /* max column norm */
1919       PetscReal *tmp,*tmp2;
1920       PetscInt  *jj,*garray = aij->garray;
1921       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1922       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1923       *norm = 0.0;
1924       v     = amat->a; jj = amat->j;
1925       for (j=0; j<amat->nz; j++) {
1926         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1927       }
1928       v = bmat->a; jj = bmat->j;
1929       for (j=0; j<bmat->nz; j++) {
1930         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1931       }
1932       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1933       for (j=0; j<mat->cmap->N; j++) {
1934         if (tmp2[j] > *norm) *norm = tmp2[j];
1935       }
1936       ierr = PetscFree(tmp);CHKERRQ(ierr);
1937       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1938       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1939     } else if (type == NORM_INFINITY) { /* max row norm */
1940       PetscReal ntemp = 0.0;
1941       for (j=0; j<aij->A->rmap->n; j++) {
1942         v   = amat->a + amat->i[j];
1943         sum = 0.0;
1944         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1945           sum += PetscAbsScalar(*v); v++;
1946         }
1947         v = bmat->a + bmat->i[j];
1948         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1949           sum += PetscAbsScalar(*v); v++;
1950         }
1951         if (sum > ntemp) ntemp = sum;
1952       }
1953       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1955     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1961 {
1962   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1963   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1964   PetscErrorCode ierr;
1965   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1966   PetscInt       cstart = A->cmap->rstart,ncol;
1967   Mat            B;
1968   MatScalar      *array;
1969 
1970   PetscFunctionBegin;
1971   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1972   ai = Aloc->i; aj = Aloc->j;
1973   bi = Bloc->i; bj = Bloc->j;
1974   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1975     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1976     PetscSFNode          *oloc;
1977     PETSC_UNUSED PetscSF sf;
1978 
1979     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1980     /* compute d_nnz for preallocation */
1981     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1982     for (i=0; i<ai[ma]; i++) {
1983       d_nnz[aj[i]]++;
1984       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1985     }
1986     /* compute local off-diagonal contributions */
1987     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1989     /* map those to global */
1990     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1991     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1992     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1993     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1994     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1995     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1996     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1997 
1998     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1999     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2000     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2001     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2002     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2003     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2004   } else {
2005     B    = *matout;
2006     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2007     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2008   }
2009 
2010   /* copy over the A part */
2011   array = Aloc->a;
2012   row   = A->rmap->rstart;
2013   for (i=0; i<ma; i++) {
2014     ncol = ai[i+1]-ai[i];
2015     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2016     row++;
2017     array += ncol; aj += ncol;
2018   }
2019   aj = Aloc->j;
2020   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2021 
2022   /* copy over the B part */
2023   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2024   array = Bloc->a;
2025   row   = A->rmap->rstart;
2026   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2027   cols_tmp = cols;
2028   for (i=0; i<mb; i++) {
2029     ncol = bi[i+1]-bi[i];
2030     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2031     row++;
2032     array += ncol; cols_tmp += ncol;
2033   }
2034   ierr = PetscFree(cols);CHKERRQ(ierr);
2035 
2036   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2037   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2038   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2039     *matout = B;
2040   } else {
2041     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2047 {
2048   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2049   Mat            a    = aij->A,b = aij->B;
2050   PetscErrorCode ierr;
2051   PetscInt       s1,s2,s3;
2052 
2053   PetscFunctionBegin;
2054   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2055   if (rr) {
2056     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2057     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2058     /* Overlap communication with computation. */
2059     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2060   }
2061   if (ll) {
2062     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2063     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2064     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2065   }
2066   /* scale  the diagonal block */
2067   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2068 
2069   if (rr) {
2070     /* Do a scatter end and then right scale the off-diagonal block */
2071     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2072     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2078 {
2079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2088 {
2089   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2090   Mat            a,b,c,d;
2091   PetscBool      flg;
2092   PetscErrorCode ierr;
2093 
2094   PetscFunctionBegin;
2095   a = matA->A; b = matA->B;
2096   c = matB->A; d = matB->B;
2097 
2098   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2099   if (flg) {
2100     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2101   }
2102   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2110   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2111 
2112   PetscFunctionBegin;
2113   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2114   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2115     /* because of the column compression in the off-processor part of the matrix a->B,
2116        the number of columns in a->B and b->B may be different, hence we cannot call
2117        the MatCopy() directly on the two parts. If need be, we can provide a more
2118        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2119        then copying the submatrices */
2120     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2121   } else {
2122     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2123     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2124   }
2125   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2130 {
2131   PetscErrorCode ierr;
2132 
2133   PetscFunctionBegin;
2134   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 /*
2139    Computes the number of nonzeros per row needed for preallocation when X and Y
2140    have different nonzero structure.
2141 */
2142 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2143 {
2144   PetscInt       i,j,k,nzx,nzy;
2145 
2146   PetscFunctionBegin;
2147   /* Set the number of nonzeros in the new matrix */
2148   for (i=0; i<m; i++) {
2149     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2150     nzx = xi[i+1] - xi[i];
2151     nzy = yi[i+1] - yi[i];
2152     nnz[i] = 0;
2153     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2154       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2155       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2156       nnz[i]++;
2157     }
2158     for (; k<nzy; k++) nnz[i]++;
2159   }
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2164 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2165 {
2166   PetscErrorCode ierr;
2167   PetscInt       m = Y->rmap->N;
2168   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2169   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2177 {
2178   PetscErrorCode ierr;
2179   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2180   PetscBLASInt   bnz,one=1;
2181   Mat_SeqAIJ     *x,*y;
2182 
2183   PetscFunctionBegin;
2184   if (str == SAME_NONZERO_PATTERN) {
2185     PetscScalar alpha = a;
2186     x    = (Mat_SeqAIJ*)xx->A->data;
2187     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2188     y    = (Mat_SeqAIJ*)yy->A->data;
2189     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2190     x    = (Mat_SeqAIJ*)xx->B->data;
2191     y    = (Mat_SeqAIJ*)yy->B->data;
2192     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2193     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2194     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2195   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2196     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2197   } else {
2198     Mat      B;
2199     PetscInt *nnz_d,*nnz_o;
2200     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2201     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2202     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2203     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2204     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2205     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2206     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2207     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2208     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2209     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2210     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2211     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2212     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2213     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2219 
2220 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2221 {
2222 #if defined(PETSC_USE_COMPLEX)
2223   PetscErrorCode ierr;
2224   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2225 
2226   PetscFunctionBegin;
2227   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2228   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2229 #else
2230   PetscFunctionBegin;
2231 #endif
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2236 {
2237   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2238   PetscErrorCode ierr;
2239 
2240   PetscFunctionBegin;
2241   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2242   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2247 {
2248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2249   PetscErrorCode ierr;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2253   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2258 {
2259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2260   PetscErrorCode ierr;
2261   PetscInt       i,*idxb = 0;
2262   PetscScalar    *va,*vb;
2263   Vec            vtmp;
2264 
2265   PetscFunctionBegin;
2266   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2267   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2268   if (idx) {
2269     for (i=0; i<A->rmap->n; i++) {
2270       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2271     }
2272   }
2273 
2274   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2275   if (idx) {
2276     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2277   }
2278   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2279   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2280 
2281   for (i=0; i<A->rmap->n; i++) {
2282     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2283       va[i] = vb[i];
2284       if (idx) idx[i] = a->garray[idxb[i]];
2285     }
2286   }
2287 
2288   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2289   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2290   ierr = PetscFree(idxb);CHKERRQ(ierr);
2291   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2296 {
2297   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2298   PetscErrorCode ierr;
2299   PetscInt       i,*idxb = 0;
2300   PetscScalar    *va,*vb;
2301   Vec            vtmp;
2302 
2303   PetscFunctionBegin;
2304   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2305   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2306   if (idx) {
2307     for (i=0; i<A->cmap->n; i++) {
2308       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2309     }
2310   }
2311 
2312   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2313   if (idx) {
2314     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2315   }
2316   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2317   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2318 
2319   for (i=0; i<A->rmap->n; i++) {
2320     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2321       va[i] = vb[i];
2322       if (idx) idx[i] = a->garray[idxb[i]];
2323     }
2324   }
2325 
2326   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2327   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2328   ierr = PetscFree(idxb);CHKERRQ(ierr);
2329   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2334 {
2335   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2336   PetscInt       n      = A->rmap->n;
2337   PetscInt       cstart = A->cmap->rstart;
2338   PetscInt       *cmap  = mat->garray;
2339   PetscInt       *diagIdx, *offdiagIdx;
2340   Vec            diagV, offdiagV;
2341   PetscScalar    *a, *diagA, *offdiagA;
2342   PetscInt       r;
2343   PetscErrorCode ierr;
2344 
2345   PetscFunctionBegin;
2346   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2347   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2348   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2349   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2350   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2351   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2352   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2353   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2354   for (r = 0; r < n; ++r) {
2355     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2356       a[r]   = diagA[r];
2357       idx[r] = cstart + diagIdx[r];
2358     } else {
2359       a[r]   = offdiagA[r];
2360       idx[r] = cmap[offdiagIdx[r]];
2361     }
2362   }
2363   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2364   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2365   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2366   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2367   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2368   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2373 {
2374   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2375   PetscInt       n      = A->rmap->n;
2376   PetscInt       cstart = A->cmap->rstart;
2377   PetscInt       *cmap  = mat->garray;
2378   PetscInt       *diagIdx, *offdiagIdx;
2379   Vec            diagV, offdiagV;
2380   PetscScalar    *a, *diagA, *offdiagA;
2381   PetscInt       r;
2382   PetscErrorCode ierr;
2383 
2384   PetscFunctionBegin;
2385   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2386   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2387   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2388   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2389   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2390   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2391   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2392   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2393   for (r = 0; r < n; ++r) {
2394     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2395       a[r]   = diagA[r];
2396       idx[r] = cstart + diagIdx[r];
2397     } else {
2398       a[r]   = offdiagA[r];
2399       idx[r] = cmap[offdiagIdx[r]];
2400     }
2401   }
2402   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2403   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2404   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2405   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2406   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2407   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2412 {
2413   PetscErrorCode ierr;
2414   Mat            *dummy;
2415 
2416   PetscFunctionBegin;
2417   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2418   *newmat = *dummy;
2419   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2424 {
2425   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2430   A->factorerrortype = a->A->factorerrortype;
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2435 {
2436   PetscErrorCode ierr;
2437   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2438 
2439   PetscFunctionBegin;
2440   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2441   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2442   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2443   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2448 {
2449   PetscFunctionBegin;
2450   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2451   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 /*@
2456    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2457 
2458    Collective on Mat
2459 
2460    Input Parameters:
2461 +    A - the matrix
2462 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2463 
2464  Level: advanced
2465 
2466 @*/
2467 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2468 {
2469   PetscErrorCode       ierr;
2470 
2471   PetscFunctionBegin;
2472   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2477 {
2478   PetscErrorCode       ierr;
2479   PetscBool            sc = PETSC_FALSE,flg;
2480 
2481   PetscFunctionBegin;
2482   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2483   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2484   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2485   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2486   if (flg) {
2487     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2488   }
2489   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2494 {
2495   PetscErrorCode ierr;
2496   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2497   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2498 
2499   PetscFunctionBegin;
2500   if (!Y->preallocated) {
2501     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2502   } else if (!aij->nz) {
2503     PetscInt nonew = aij->nonew;
2504     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2505     aij->nonew = nonew;
2506   }
2507   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2512 {
2513   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2514   PetscErrorCode ierr;
2515 
2516   PetscFunctionBegin;
2517   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2518   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2519   if (d) {
2520     PetscInt rstart;
2521     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2522     *d += rstart;
2523 
2524   }
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 
2529 /* -------------------------------------------------------------------*/
2530 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2531                                        MatGetRow_MPIAIJ,
2532                                        MatRestoreRow_MPIAIJ,
2533                                        MatMult_MPIAIJ,
2534                                 /* 4*/ MatMultAdd_MPIAIJ,
2535                                        MatMultTranspose_MPIAIJ,
2536                                        MatMultTransposeAdd_MPIAIJ,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                 /*10*/ 0,
2541                                        0,
2542                                        0,
2543                                        MatSOR_MPIAIJ,
2544                                        MatTranspose_MPIAIJ,
2545                                 /*15*/ MatGetInfo_MPIAIJ,
2546                                        MatEqual_MPIAIJ,
2547                                        MatGetDiagonal_MPIAIJ,
2548                                        MatDiagonalScale_MPIAIJ,
2549                                        MatNorm_MPIAIJ,
2550                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2551                                        MatAssemblyEnd_MPIAIJ,
2552                                        MatSetOption_MPIAIJ,
2553                                        MatZeroEntries_MPIAIJ,
2554                                 /*24*/ MatZeroRows_MPIAIJ,
2555                                        0,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                 /*29*/ MatSetUp_MPIAIJ,
2560                                        0,
2561                                        0,
2562                                        MatGetDiagonalBlock_MPIAIJ,
2563                                        0,
2564                                 /*34*/ MatDuplicate_MPIAIJ,
2565                                        0,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                 /*39*/ MatAXPY_MPIAIJ,
2570                                        MatCreateSubMatrices_MPIAIJ,
2571                                        MatIncreaseOverlap_MPIAIJ,
2572                                        MatGetValues_MPIAIJ,
2573                                        MatCopy_MPIAIJ,
2574                                 /*44*/ MatGetRowMax_MPIAIJ,
2575                                        MatScale_MPIAIJ,
2576                                        MatShift_MPIAIJ,
2577                                        MatDiagonalSet_MPIAIJ,
2578                                        MatZeroRowsColumns_MPIAIJ,
2579                                 /*49*/ MatSetRandom_MPIAIJ,
2580                                        0,
2581                                        0,
2582                                        0,
2583                                        0,
2584                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2585                                        0,
2586                                        MatSetUnfactored_MPIAIJ,
2587                                        MatPermute_MPIAIJ,
2588                                        0,
2589                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2590                                        MatDestroy_MPIAIJ,
2591                                        MatView_MPIAIJ,
2592                                        0,
2593                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2594                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2595                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2600                                        MatGetRowMinAbs_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*75*/ MatFDColoringApply_AIJ,
2606                                        MatSetFromOptions_MPIAIJ,
2607                                        0,
2608                                        0,
2609                                        MatFindZeroDiagonals_MPIAIJ,
2610                                 /*80*/ 0,
2611                                        0,
2612                                        0,
2613                                 /*83*/ MatLoad_MPIAIJ,
2614                                        MatIsSymmetric_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2620                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2621                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2622                                        MatPtAP_MPIAIJ_MPIAIJ,
2623                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2624                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*99*/ 0,
2630                                        0,
2631                                        0,
2632                                        MatConjugate_MPIAIJ,
2633                                        0,
2634                                 /*104*/MatSetValuesRow_MPIAIJ,
2635                                        MatRealPart_MPIAIJ,
2636                                        MatImaginaryPart_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                 /*109*/0,
2640                                        0,
2641                                        MatGetRowMin_MPIAIJ,
2642                                        0,
2643                                        MatMissingDiagonal_MPIAIJ,
2644                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2645                                        0,
2646                                        MatGetGhosts_MPIAIJ,
2647                                        0,
2648                                        0,
2649                                 /*119*/0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        MatGetMultiProcBlock_MPIAIJ,
2654                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2655                                        MatGetColumnNorms_MPIAIJ,
2656                                        MatInvertBlockDiagonal_MPIAIJ,
2657                                        0,
2658                                        MatCreateSubMatricesMPI_MPIAIJ,
2659                                 /*129*/0,
2660                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2661                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2662                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2663                                        0,
2664                                 /*134*/0,
2665                                        0,
2666                                        MatRARt_MPIAIJ_MPIAIJ,
2667                                        0,
2668                                        0,
2669                                 /*139*/MatSetBlockSizes_MPIAIJ,
2670                                        0,
2671                                        0,
2672                                        MatFDColoringSetUp_MPIXAIJ,
2673                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2674                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2675 };
2676 
2677 /* ----------------------------------------------------------------------------------------*/
2678 
2679 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2680 {
2681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2682   PetscErrorCode ierr;
2683 
2684   PetscFunctionBegin;
2685   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2686   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2691 {
2692   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2697   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2702 {
2703   Mat_MPIAIJ     *b;
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2708   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2709   b = (Mat_MPIAIJ*)B->data;
2710 
2711 #if defined(PETSC_USE_CTABLE)
2712   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2713 #else
2714   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2715 #endif
2716   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2717   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2718   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2719 
2720   /* Because the B will have been resized we simply destroy it and create a new one each time */
2721   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2722   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2723   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2724   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2725   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2726   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2727 
2728   if (!B->preallocated) {
2729     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2730     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2731     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2732     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2733     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2734   }
2735 
2736   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2737   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2738   B->preallocated  = PETSC_TRUE;
2739   B->was_assembled = PETSC_FALSE;
2740   B->assembled     = PETSC_FALSE;;
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2745 {
2746   Mat_MPIAIJ     *b;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2751   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2752   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2753   b = (Mat_MPIAIJ*)B->data;
2754 
2755 #if defined(PETSC_USE_CTABLE)
2756   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2757 #else
2758   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2759 #endif
2760   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2761   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2762   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2763 
2764   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2765   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2766   B->preallocated  = PETSC_TRUE;
2767   B->was_assembled = PETSC_FALSE;
2768   B->assembled = PETSC_FALSE;
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2773 {
2774   Mat            mat;
2775   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2776   PetscErrorCode ierr;
2777 
2778   PetscFunctionBegin;
2779   *newmat = 0;
2780   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2781   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2782   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2783   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2784   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2785   a       = (Mat_MPIAIJ*)mat->data;
2786 
2787   mat->factortype   = matin->factortype;
2788   mat->assembled    = PETSC_TRUE;
2789   mat->insertmode   = NOT_SET_VALUES;
2790   mat->preallocated = PETSC_TRUE;
2791 
2792   a->size         = oldmat->size;
2793   a->rank         = oldmat->rank;
2794   a->donotstash   = oldmat->donotstash;
2795   a->roworiented  = oldmat->roworiented;
2796   a->rowindices   = 0;
2797   a->rowvalues    = 0;
2798   a->getrowactive = PETSC_FALSE;
2799 
2800   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2802 
2803   if (oldmat->colmap) {
2804 #if defined(PETSC_USE_CTABLE)
2805     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2806 #else
2807     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2808     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2809     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2810 #endif
2811   } else a->colmap = 0;
2812   if (oldmat->garray) {
2813     PetscInt len;
2814     len  = oldmat->B->cmap->n;
2815     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2816     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2817     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2818   } else a->garray = 0;
2819 
2820   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2821   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2822   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2823   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2824 
2825   if (oldmat->Mvctx_mpi1) {
2826     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2827     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2828   }
2829 
2830   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2831   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2832   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2833   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2834   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2835   *newmat = mat;
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2840 {
2841   PetscScalar    *vals,*svals;
2842   MPI_Comm       comm;
2843   PetscErrorCode ierr;
2844   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2845   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2846   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2847   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2848   PetscInt       cend,cstart,n,*rowners;
2849   int            fd;
2850   PetscInt       bs = newMat->rmap->bs;
2851 
2852   PetscFunctionBegin;
2853   /* force binary viewer to load .info file if it has not yet done so */
2854   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2855   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2856   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2857   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2858   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2859   if (!rank) {
2860     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2861     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2862     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2863   }
2864 
2865   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2866   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2867   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2868   if (bs < 0) bs = 1;
2869 
2870   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2871   M    = header[1]; N = header[2];
2872 
2873   /* If global sizes are set, check if they are consistent with that given in the file */
2874   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2875   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2876 
2877   /* determine ownership of all (block) rows */
2878   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2879   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2880   else m = newMat->rmap->n; /* Set by user */
2881 
2882   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2883   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2884 
2885   /* First process needs enough room for process with most rows */
2886   if (!rank) {
2887     mmax = rowners[1];
2888     for (i=2; i<=size; i++) {
2889       mmax = PetscMax(mmax, rowners[i]);
2890     }
2891   } else mmax = -1;             /* unused, but compilers complain */
2892 
2893   rowners[0] = 0;
2894   for (i=2; i<=size; i++) {
2895     rowners[i] += rowners[i-1];
2896   }
2897   rstart = rowners[rank];
2898   rend   = rowners[rank+1];
2899 
2900   /* distribute row lengths to all processors */
2901   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2902   if (!rank) {
2903     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2904     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2905     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2906     for (j=0; j<m; j++) {
2907       procsnz[0] += ourlens[j];
2908     }
2909     for (i=1; i<size; i++) {
2910       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2911       /* calculate the number of nonzeros on each processor */
2912       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2913         procsnz[i] += rowlengths[j];
2914       }
2915       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2916     }
2917     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2918   } else {
2919     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2920   }
2921 
2922   if (!rank) {
2923     /* determine max buffer needed and allocate it */
2924     maxnz = 0;
2925     for (i=0; i<size; i++) {
2926       maxnz = PetscMax(maxnz,procsnz[i]);
2927     }
2928     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2929 
2930     /* read in my part of the matrix column indices  */
2931     nz   = procsnz[0];
2932     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2933     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2934 
2935     /* read in every one elses and ship off */
2936     for (i=1; i<size; i++) {
2937       nz   = procsnz[i];
2938       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2939       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2940     }
2941     ierr = PetscFree(cols);CHKERRQ(ierr);
2942   } else {
2943     /* determine buffer space needed for message */
2944     nz = 0;
2945     for (i=0; i<m; i++) {
2946       nz += ourlens[i];
2947     }
2948     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2949 
2950     /* receive message of column indices*/
2951     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2952   }
2953 
2954   /* determine column ownership if matrix is not square */
2955   if (N != M) {
2956     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2957     else n = newMat->cmap->n;
2958     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2959     cstart = cend - n;
2960   } else {
2961     cstart = rstart;
2962     cend   = rend;
2963     n      = cend - cstart;
2964   }
2965 
2966   /* loop over local rows, determining number of off diagonal entries */
2967   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2968   jj   = 0;
2969   for (i=0; i<m; i++) {
2970     for (j=0; j<ourlens[i]; j++) {
2971       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2972       jj++;
2973     }
2974   }
2975 
2976   for (i=0; i<m; i++) {
2977     ourlens[i] -= offlens[i];
2978   }
2979   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2980 
2981   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2982 
2983   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2984 
2985   for (i=0; i<m; i++) {
2986     ourlens[i] += offlens[i];
2987   }
2988 
2989   if (!rank) {
2990     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2991 
2992     /* read in my part of the matrix numerical values  */
2993     nz   = procsnz[0];
2994     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2995 
2996     /* insert into matrix */
2997     jj      = rstart;
2998     smycols = mycols;
2999     svals   = vals;
3000     for (i=0; i<m; i++) {
3001       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3002       smycols += ourlens[i];
3003       svals   += ourlens[i];
3004       jj++;
3005     }
3006 
3007     /* read in other processors and ship out */
3008     for (i=1; i<size; i++) {
3009       nz   = procsnz[i];
3010       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3011       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3012     }
3013     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3014   } else {
3015     /* receive numeric values */
3016     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3017 
3018     /* receive message of values*/
3019     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3020 
3021     /* insert into matrix */
3022     jj      = rstart;
3023     smycols = mycols;
3024     svals   = vals;
3025     for (i=0; i<m; i++) {
3026       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3027       smycols += ourlens[i];
3028       svals   += ourlens[i];
3029       jj++;
3030     }
3031   }
3032   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3033   ierr = PetscFree(vals);CHKERRQ(ierr);
3034   ierr = PetscFree(mycols);CHKERRQ(ierr);
3035   ierr = PetscFree(rowners);CHKERRQ(ierr);
3036   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3037   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3038   PetscFunctionReturn(0);
3039 }
3040 
3041 /* Not scalable because of ISAllGather() unless getting all columns. */
3042 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3043 {
3044   PetscErrorCode ierr;
3045   IS             iscol_local;
3046   PetscBool      isstride;
3047   PetscMPIInt    lisstride=0,gisstride;
3048 
3049   PetscFunctionBegin;
3050   /* check if we are grabbing all columns*/
3051   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3052 
3053   if (isstride) {
3054     PetscInt  start,len,mstart,mlen;
3055     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3056     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3057     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3058     if (mstart == start && mlen-mstart == len) lisstride = 1;
3059   }
3060 
3061   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3062   if (gisstride) {
3063     PetscInt N;
3064     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3065     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3066     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3067     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3068   } else {
3069     PetscInt cbs;
3070     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3071     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3072     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3073   }
3074 
3075   *isseq = iscol_local;
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 /*
3080  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3081  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3082 
3083  Input Parameters:
3084    mat - matrix
3085    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3086            i.e., mat->rstart <= isrow[i] < mat->rend
3087    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3088            i.e., mat->cstart <= iscol[i] < mat->cend
3089  Output Parameter:
3090    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3091    iscol_o - sequential column index set for retrieving mat->B
3092    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3093  */
3094 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3095 {
3096   PetscErrorCode ierr;
3097   Vec            x,cmap;
3098   const PetscInt *is_idx;
3099   PetscScalar    *xarray,*cmaparray;
3100   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3101   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3102   Mat            B=a->B;
3103   Vec            lvec=a->lvec,lcmap;
3104   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3105   MPI_Comm       comm;
3106   VecScatter     Mvctx=a->Mvctx;
3107 
3108   PetscFunctionBegin;
3109   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3110   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3111 
3112   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3113   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3114   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3115   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3116   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3117 
3118   /* Get start indices */
3119   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3120   isstart -= ncols;
3121   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3122 
3123   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3124   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3125   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3126   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3127   for (i=0; i<ncols; i++) {
3128     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3129     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3130     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3131   }
3132   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3133   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3134   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3135 
3136   /* Get iscol_d */
3137   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3138   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3139   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3140 
3141   /* Get isrow_d */
3142   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3143   rstart = mat->rmap->rstart;
3144   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3145   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3146   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3147   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3148 
3149   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3150   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3151   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3152 
3153   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3154   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3155   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3156 
3157   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3158 
3159   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3160   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3161 
3162   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3163   /* off-process column indices */
3164   count = 0;
3165   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3166   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3167 
3168   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3169   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3170   for (i=0; i<Bn; i++) {
3171     if (PetscRealPart(xarray[i]) > -1.0) {
3172       idx[count]     = i;                   /* local column index in off-diagonal part B */
3173       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3174       count++;
3175     }
3176   }
3177   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3178   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3179 
3180   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3181   /* cannot ensure iscol_o has same blocksize as iscol! */
3182 
3183   ierr = PetscFree(idx);CHKERRQ(ierr);
3184   *garray = cmap1;
3185 
3186   ierr = VecDestroy(&x);CHKERRQ(ierr);
3187   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3188   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3189   PetscFunctionReturn(0);
3190 }
3191 
3192 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3193 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3194 {
3195   PetscErrorCode ierr;
3196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3197   Mat            M = NULL;
3198   MPI_Comm       comm;
3199   IS             iscol_d,isrow_d,iscol_o;
3200   Mat            Asub = NULL,Bsub = NULL;
3201   PetscInt       n;
3202 
3203   PetscFunctionBegin;
3204   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3205 
3206   if (call == MAT_REUSE_MATRIX) {
3207     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3208     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3209     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3210 
3211     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3212     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3213 
3214     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3215     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3216 
3217     /* Update diagonal and off-diagonal portions of submat */
3218     asub = (Mat_MPIAIJ*)(*submat)->data;
3219     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3220     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3221     if (n) {
3222       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3223     }
3224     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3225     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3226 
3227   } else { /* call == MAT_INITIAL_MATRIX) */
3228     const PetscInt *garray;
3229     PetscInt        BsubN;
3230 
3231     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3232     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3233 
3234     /* Create local submatrices Asub and Bsub */
3235     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3236     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3237 
3238     /* Create submatrix M */
3239     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3240 
3241     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3242     asub = (Mat_MPIAIJ*)M->data;
3243 
3244     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3245     n = asub->B->cmap->N;
3246     if (BsubN > n) {
3247       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3248       const PetscInt *idx;
3249       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3250       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3251 
3252       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3253       j = 0;
3254       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3255       for (i=0; i<n; i++) {
3256         if (j >= BsubN) break;
3257         while (subgarray[i] > garray[j]) j++;
3258 
3259         if (subgarray[i] == garray[j]) {
3260           idx_new[i] = idx[j++];
3261         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3262       }
3263       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3264 
3265       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3266       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3267 
3268     } else if (BsubN < n) {
3269       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3270     }
3271 
3272     ierr = PetscFree(garray);CHKERRQ(ierr);
3273     *submat = M;
3274 
3275     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3276     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3277     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3278 
3279     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3280     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3281 
3282     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3283     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3284   }
3285   PetscFunctionReturn(0);
3286 }
3287 
3288 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3289 {
3290   PetscErrorCode ierr;
3291   IS             iscol_local=NULL,isrow_d;
3292   PetscInt       csize;
3293   PetscInt       n,i,j,start,end;
3294   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3295   MPI_Comm       comm;
3296 
3297   PetscFunctionBegin;
3298   /* If isrow has same processor distribution as mat,
3299      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3300   if (call == MAT_REUSE_MATRIX) {
3301     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3302     if (isrow_d) {
3303       sameRowDist  = PETSC_TRUE;
3304       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3305     } else {
3306       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3307       if (iscol_local) {
3308         sameRowDist  = PETSC_TRUE;
3309         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3310       }
3311     }
3312   } else {
3313     /* Check if isrow has same processor distribution as mat */
3314     sameDist[0] = PETSC_FALSE;
3315     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3316     if (!n) {
3317       sameDist[0] = PETSC_TRUE;
3318     } else {
3319       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3320       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3321       if (i >= start && j < end) {
3322         sameDist[0] = PETSC_TRUE;
3323       }
3324     }
3325 
3326     /* Check if iscol has same processor distribution as mat */
3327     sameDist[1] = PETSC_FALSE;
3328     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3329     if (!n) {
3330       sameDist[1] = PETSC_TRUE;
3331     } else {
3332       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3333       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3334       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3335     }
3336 
3337     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3338     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3339     sameRowDist = tsameDist[0];
3340   }
3341 
3342   if (sameRowDist) {
3343     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3344       /* isrow and iscol have same processor distribution as mat */
3345       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3346       PetscFunctionReturn(0);
3347     } else { /* sameRowDist */
3348       /* isrow has same processor distribution as mat */
3349       if (call == MAT_INITIAL_MATRIX) {
3350         PetscBool sorted;
3351         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3352         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3353         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3354         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3355 
3356         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3357         if (sorted) {
3358           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3359           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3360           PetscFunctionReturn(0);
3361         }
3362       } else { /* call == MAT_REUSE_MATRIX */
3363         IS    iscol_sub;
3364         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3365         if (iscol_sub) {
3366           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3367           PetscFunctionReturn(0);
3368         }
3369       }
3370     }
3371   }
3372 
3373   /* General case: iscol -> iscol_local which has global size of iscol */
3374   if (call == MAT_REUSE_MATRIX) {
3375     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3376     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3377   } else {
3378     if (!iscol_local) {
3379       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3380     }
3381   }
3382 
3383   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3384   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3385 
3386   if (call == MAT_INITIAL_MATRIX) {
3387     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3388     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3389   }
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 /*@C
3394      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3395          and "off-diagonal" part of the matrix in CSR format.
3396 
3397    Collective on MPI_Comm
3398 
3399    Input Parameters:
3400 +  comm - MPI communicator
3401 .  A - "diagonal" portion of matrix
3402 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3403 -  garray - global index of B columns
3404 
3405    Output Parameter:
3406 .   mat - the matrix, with input A as its local diagonal matrix
3407    Level: advanced
3408 
3409    Notes:
3410        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3411        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3412 
3413 .seealso: MatCreateMPIAIJWithSplitArrays()
3414 @*/
3415 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3416 {
3417   PetscErrorCode ierr;
3418   Mat_MPIAIJ     *maij;
3419   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3420   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3421   PetscScalar    *oa=b->a;
3422   Mat            Bnew;
3423   PetscInt       m,n,N;
3424 
3425   PetscFunctionBegin;
3426   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3427   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3428   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3429   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3430   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3431   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3432 
3433   /* Get global columns of mat */
3434   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3435 
3436   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3437   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3438   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3439   maij = (Mat_MPIAIJ*)(*mat)->data;
3440 
3441   (*mat)->preallocated = PETSC_TRUE;
3442 
3443   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3444   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3445 
3446   /* Set A as diagonal portion of *mat */
3447   maij->A = A;
3448 
3449   nz = oi[m];
3450   for (i=0; i<nz; i++) {
3451     col   = oj[i];
3452     oj[i] = garray[col];
3453   }
3454 
3455    /* Set Bnew as off-diagonal portion of *mat */
3456   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3457   bnew        = (Mat_SeqAIJ*)Bnew->data;
3458   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3459   maij->B     = Bnew;
3460 
3461   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3462 
3463   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3464   b->free_a       = PETSC_FALSE;
3465   b->free_ij      = PETSC_FALSE;
3466   ierr = MatDestroy(&B);CHKERRQ(ierr);
3467 
3468   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3469   bnew->free_a       = PETSC_TRUE;
3470   bnew->free_ij      = PETSC_TRUE;
3471 
3472   /* condense columns of maij->B */
3473   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3474   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3475   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3476   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3477   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3482 
3483 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3484 {
3485   PetscErrorCode ierr;
3486   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3487   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3488   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3489   Mat            M,Msub,B=a->B;
3490   MatScalar      *aa;
3491   Mat_SeqAIJ     *aij;
3492   PetscInt       *garray = a->garray,*colsub,Ncols;
3493   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3494   IS             iscol_sub,iscmap;
3495   const PetscInt *is_idx,*cmap;
3496   PetscBool      allcolumns=PETSC_FALSE;
3497   MPI_Comm       comm;
3498 
3499   PetscFunctionBegin;
3500   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3501 
3502   if (call == MAT_REUSE_MATRIX) {
3503     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3504     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3505     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3506 
3507     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3508     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3509 
3510     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3511     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3512 
3513     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3514 
3515   } else { /* call == MAT_INITIAL_MATRIX) */
3516     PetscBool flg;
3517 
3518     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3519     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3520 
3521     /* (1) iscol -> nonscalable iscol_local */
3522     /* Check for special case: each processor gets entire matrix columns */
3523     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3524     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3525     if (allcolumns) {
3526       iscol_sub = iscol_local;
3527       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3528       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3529 
3530     } else {
3531       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3532       PetscInt *idx,*cmap1,k;
3533       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3534       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3535       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3536       count = 0;
3537       k     = 0;
3538       for (i=0; i<Ncols; i++) {
3539         j = is_idx[i];
3540         if (j >= cstart && j < cend) {
3541           /* diagonal part of mat */
3542           idx[count]     = j;
3543           cmap1[count++] = i; /* column index in submat */
3544         } else if (Bn) {
3545           /* off-diagonal part of mat */
3546           if (j == garray[k]) {
3547             idx[count]     = j;
3548             cmap1[count++] = i;  /* column index in submat */
3549           } else if (j > garray[k]) {
3550             while (j > garray[k] && k < Bn-1) k++;
3551             if (j == garray[k]) {
3552               idx[count]     = j;
3553               cmap1[count++] = i; /* column index in submat */
3554             }
3555           }
3556         }
3557       }
3558       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3559 
3560       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3561       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3562       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3563 
3564       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3565     }
3566 
3567     /* (3) Create sequential Msub */
3568     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3569   }
3570 
3571   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3572   aij  = (Mat_SeqAIJ*)(Msub)->data;
3573   ii   = aij->i;
3574   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3575 
3576   /*
3577       m - number of local rows
3578       Ncols - number of columns (same on all processors)
3579       rstart - first row in new global matrix generated
3580   */
3581   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3582 
3583   if (call == MAT_INITIAL_MATRIX) {
3584     /* (4) Create parallel newmat */
3585     PetscMPIInt    rank,size;
3586     PetscInt       csize;
3587 
3588     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3589     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3590 
3591     /*
3592         Determine the number of non-zeros in the diagonal and off-diagonal
3593         portions of the matrix in order to do correct preallocation
3594     */
3595 
3596     /* first get start and end of "diagonal" columns */
3597     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3598     if (csize == PETSC_DECIDE) {
3599       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3600       if (mglobal == Ncols) { /* square matrix */
3601         nlocal = m;
3602       } else {
3603         nlocal = Ncols/size + ((Ncols % size) > rank);
3604       }
3605     } else {
3606       nlocal = csize;
3607     }
3608     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3609     rstart = rend - nlocal;
3610     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3611 
3612     /* next, compute all the lengths */
3613     jj    = aij->j;
3614     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3615     olens = dlens + m;
3616     for (i=0; i<m; i++) {
3617       jend = ii[i+1] - ii[i];
3618       olen = 0;
3619       dlen = 0;
3620       for (j=0; j<jend; j++) {
3621         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3622         else dlen++;
3623         jj++;
3624       }
3625       olens[i] = olen;
3626       dlens[i] = dlen;
3627     }
3628 
3629     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3630     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3631 
3632     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3633     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3634     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3635     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3636     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3637     ierr = PetscFree(dlens);CHKERRQ(ierr);
3638 
3639   } else { /* call == MAT_REUSE_MATRIX */
3640     M    = *newmat;
3641     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3642     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3643     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3644     /*
3645          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3646        rather than the slower MatSetValues().
3647     */
3648     M->was_assembled = PETSC_TRUE;
3649     M->assembled     = PETSC_FALSE;
3650   }
3651 
3652   /* (5) Set values of Msub to *newmat */
3653   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3654   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3655 
3656   jj   = aij->j;
3657   aa   = aij->a;
3658   for (i=0; i<m; i++) {
3659     row = rstart + i;
3660     nz  = ii[i+1] - ii[i];
3661     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3662     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3663     jj += nz; aa += nz;
3664   }
3665   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3666 
3667   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3668   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3669 
3670   ierr = PetscFree(colsub);CHKERRQ(ierr);
3671 
3672   /* save Msub, iscol_sub and iscmap used in processor for next request */
3673   if (call ==  MAT_INITIAL_MATRIX) {
3674     *newmat = M;
3675     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3676     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3677 
3678     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3679     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3680 
3681     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3682     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3683 
3684     if (iscol_local) {
3685       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3686       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3687     }
3688   }
3689   PetscFunctionReturn(0);
3690 }
3691 
3692 /*
3693     Not great since it makes two copies of the submatrix, first an SeqAIJ
3694   in local and then by concatenating the local matrices the end result.
3695   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3696 
3697   Note: This requires a sequential iscol with all indices.
3698 */
3699 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3700 {
3701   PetscErrorCode ierr;
3702   PetscMPIInt    rank,size;
3703   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3704   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3705   Mat            M,Mreuse;
3706   MatScalar      *aa,*vwork;
3707   MPI_Comm       comm;
3708   Mat_SeqAIJ     *aij;
3709   PetscBool      colflag,allcolumns=PETSC_FALSE;
3710 
3711   PetscFunctionBegin;
3712   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3714   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3715 
3716   /* Check for special case: each processor gets entire matrix columns */
3717   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3718   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3719   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3720 
3721   if (call ==  MAT_REUSE_MATRIX) {
3722     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3723     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3724     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3725   } else {
3726     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3727   }
3728 
3729   /*
3730       m - number of local rows
3731       n - number of columns (same on all processors)
3732       rstart - first row in new global matrix generated
3733   */
3734   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3735   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3736   if (call == MAT_INITIAL_MATRIX) {
3737     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3738     ii  = aij->i;
3739     jj  = aij->j;
3740 
3741     /*
3742         Determine the number of non-zeros in the diagonal and off-diagonal
3743         portions of the matrix in order to do correct preallocation
3744     */
3745 
3746     /* first get start and end of "diagonal" columns */
3747     if (csize == PETSC_DECIDE) {
3748       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3749       if (mglobal == n) { /* square matrix */
3750         nlocal = m;
3751       } else {
3752         nlocal = n/size + ((n % size) > rank);
3753       }
3754     } else {
3755       nlocal = csize;
3756     }
3757     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3758     rstart = rend - nlocal;
3759     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3760 
3761     /* next, compute all the lengths */
3762     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3763     olens = dlens + m;
3764     for (i=0; i<m; i++) {
3765       jend = ii[i+1] - ii[i];
3766       olen = 0;
3767       dlen = 0;
3768       for (j=0; j<jend; j++) {
3769         if (*jj < rstart || *jj >= rend) olen++;
3770         else dlen++;
3771         jj++;
3772       }
3773       olens[i] = olen;
3774       dlens[i] = dlen;
3775     }
3776     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3777     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3778     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3779     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3780     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3781     ierr = PetscFree(dlens);CHKERRQ(ierr);
3782   } else {
3783     PetscInt ml,nl;
3784 
3785     M    = *newmat;
3786     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3787     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3788     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3789     /*
3790          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3791        rather than the slower MatSetValues().
3792     */
3793     M->was_assembled = PETSC_TRUE;
3794     M->assembled     = PETSC_FALSE;
3795   }
3796   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3797   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3798   ii   = aij->i;
3799   jj   = aij->j;
3800   aa   = aij->a;
3801   for (i=0; i<m; i++) {
3802     row   = rstart + i;
3803     nz    = ii[i+1] - ii[i];
3804     cwork = jj;     jj += nz;
3805     vwork = aa;     aa += nz;
3806     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3807   }
3808 
3809   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811   *newmat = M;
3812 
3813   /* save submatrix used in processor for next request */
3814   if (call ==  MAT_INITIAL_MATRIX) {
3815     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3816     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3817   }
3818   PetscFunctionReturn(0);
3819 }
3820 
3821 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3822 {
3823   PetscInt       m,cstart, cend,j,nnz,i,d;
3824   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3825   const PetscInt *JJ;
3826   PetscScalar    *values;
3827   PetscErrorCode ierr;
3828   PetscBool      nooffprocentries;
3829 
3830   PetscFunctionBegin;
3831   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3832 
3833   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3834   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3835   m      = B->rmap->n;
3836   cstart = B->cmap->rstart;
3837   cend   = B->cmap->rend;
3838   rstart = B->rmap->rstart;
3839 
3840   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3841 
3842 #if defined(PETSC_USE_DEBUG)
3843   for (i=0; i<m; i++) {
3844     nnz = Ii[i+1]- Ii[i];
3845     JJ  = J + Ii[i];
3846     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3847     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3848     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3849   }
3850 #endif
3851 
3852   for (i=0; i<m; i++) {
3853     nnz     = Ii[i+1]- Ii[i];
3854     JJ      = J + Ii[i];
3855     nnz_max = PetscMax(nnz_max,nnz);
3856     d       = 0;
3857     for (j=0; j<nnz; j++) {
3858       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3859     }
3860     d_nnz[i] = d;
3861     o_nnz[i] = nnz - d;
3862   }
3863   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3864   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3865 
3866   if (v) values = (PetscScalar*)v;
3867   else {
3868     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3869   }
3870 
3871   for (i=0; i<m; i++) {
3872     ii   = i + rstart;
3873     nnz  = Ii[i+1]- Ii[i];
3874     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3875   }
3876   nooffprocentries    = B->nooffprocentries;
3877   B->nooffprocentries = PETSC_TRUE;
3878   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3880   B->nooffprocentries = nooffprocentries;
3881 
3882   if (!v) {
3883     ierr = PetscFree(values);CHKERRQ(ierr);
3884   }
3885   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 /*@
3890    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3891    (the default parallel PETSc format).
3892 
3893    Collective on MPI_Comm
3894 
3895    Input Parameters:
3896 +  B - the matrix
3897 .  i - the indices into j for the start of each local row (starts with zero)
3898 .  j - the column indices for each local row (starts with zero)
3899 -  v - optional values in the matrix
3900 
3901    Level: developer
3902 
3903    Notes:
3904        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3905      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3906      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3907 
3908        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3909 
3910        The format which is used for the sparse matrix input, is equivalent to a
3911     row-major ordering.. i.e for the following matrix, the input data expected is
3912     as shown
3913 
3914 $        1 0 0
3915 $        2 0 3     P0
3916 $       -------
3917 $        4 5 6     P1
3918 $
3919 $     Process0 [P0]: rows_owned=[0,1]
3920 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3921 $        j =  {0,0,2}  [size = 3]
3922 $        v =  {1,2,3}  [size = 3]
3923 $
3924 $     Process1 [P1]: rows_owned=[2]
3925 $        i =  {0,3}    [size = nrow+1  = 1+1]
3926 $        j =  {0,1,2}  [size = 3]
3927 $        v =  {4,5,6}  [size = 3]
3928 
3929 .keywords: matrix, aij, compressed row, sparse, parallel
3930 
3931 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3932           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3933 @*/
3934 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3935 {
3936   PetscErrorCode ierr;
3937 
3938   PetscFunctionBegin;
3939   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 /*@C
3944    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3945    (the default parallel PETSc format).  For good matrix assembly performance
3946    the user should preallocate the matrix storage by setting the parameters
3947    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3948    performance can be increased by more than a factor of 50.
3949 
3950    Collective on MPI_Comm
3951 
3952    Input Parameters:
3953 +  B - the matrix
3954 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3955            (same value is used for all local rows)
3956 .  d_nnz - array containing the number of nonzeros in the various rows of the
3957            DIAGONAL portion of the local submatrix (possibly different for each row)
3958            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3959            The size of this array is equal to the number of local rows, i.e 'm'.
3960            For matrices that will be factored, you must leave room for (and set)
3961            the diagonal entry even if it is zero.
3962 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3963            submatrix (same value is used for all local rows).
3964 -  o_nnz - array containing the number of nonzeros in the various rows of the
3965            OFF-DIAGONAL portion of the local submatrix (possibly different for
3966            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3967            structure. The size of this array is equal to the number
3968            of local rows, i.e 'm'.
3969 
3970    If the *_nnz parameter is given then the *_nz parameter is ignored
3971 
3972    The AIJ format (also called the Yale sparse matrix format or
3973    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3974    storage.  The stored row and column indices begin with zero.
3975    See Users-Manual: ch_mat for details.
3976 
3977    The parallel matrix is partitioned such that the first m0 rows belong to
3978    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3979    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3980 
3981    The DIAGONAL portion of the local submatrix of a processor can be defined
3982    as the submatrix which is obtained by extraction the part corresponding to
3983    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3984    first row that belongs to the processor, r2 is the last row belonging to
3985    the this processor, and c1-c2 is range of indices of the local part of a
3986    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3987    common case of a square matrix, the row and column ranges are the same and
3988    the DIAGONAL part is also square. The remaining portion of the local
3989    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3990 
3991    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3992 
3993    You can call MatGetInfo() to get information on how effective the preallocation was;
3994    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3995    You can also run with the option -info and look for messages with the string
3996    malloc in them to see if additional memory allocation was needed.
3997 
3998    Example usage:
3999 
4000    Consider the following 8x8 matrix with 34 non-zero values, that is
4001    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4002    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4003    as follows:
4004 
4005 .vb
4006             1  2  0  |  0  3  0  |  0  4
4007     Proc0   0  5  6  |  7  0  0  |  8  0
4008             9  0 10  | 11  0  0  | 12  0
4009     -------------------------------------
4010            13  0 14  | 15 16 17  |  0  0
4011     Proc1   0 18  0  | 19 20 21  |  0  0
4012             0  0  0  | 22 23  0  | 24  0
4013     -------------------------------------
4014     Proc2  25 26 27  |  0  0 28  | 29  0
4015            30  0  0  | 31 32 33  |  0 34
4016 .ve
4017 
4018    This can be represented as a collection of submatrices as:
4019 
4020 .vb
4021       A B C
4022       D E F
4023       G H I
4024 .ve
4025 
4026    Where the submatrices A,B,C are owned by proc0, D,E,F are
4027    owned by proc1, G,H,I are owned by proc2.
4028 
4029    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4030    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4031    The 'M','N' parameters are 8,8, and have the same values on all procs.
4032 
4033    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4034    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4035    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4036    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4037    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4038    matrix, ans [DF] as another SeqAIJ matrix.
4039 
4040    When d_nz, o_nz parameters are specified, d_nz storage elements are
4041    allocated for every row of the local diagonal submatrix, and o_nz
4042    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4043    One way to choose d_nz and o_nz is to use the max nonzerors per local
4044    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4045    In this case, the values of d_nz,o_nz are:
4046 .vb
4047      proc0 : dnz = 2, o_nz = 2
4048      proc1 : dnz = 3, o_nz = 2
4049      proc2 : dnz = 1, o_nz = 4
4050 .ve
4051    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4052    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4053    for proc3. i.e we are using 12+15+10=37 storage locations to store
4054    34 values.
4055 
4056    When d_nnz, o_nnz parameters are specified, the storage is specified
4057    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4058    In the above case the values for d_nnz,o_nnz are:
4059 .vb
4060      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4061      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4062      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4063 .ve
4064    Here the space allocated is sum of all the above values i.e 34, and
4065    hence pre-allocation is perfect.
4066 
4067    Level: intermediate
4068 
4069 .keywords: matrix, aij, compressed row, sparse, parallel
4070 
4071 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4072           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4073 @*/
4074 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4075 {
4076   PetscErrorCode ierr;
4077 
4078   PetscFunctionBegin;
4079   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4080   PetscValidType(B,1);
4081   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4082   PetscFunctionReturn(0);
4083 }
4084 
4085 /*@
4086      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4087          CSR format the local rows.
4088 
4089    Collective on MPI_Comm
4090 
4091    Input Parameters:
4092 +  comm - MPI communicator
4093 .  m - number of local rows (Cannot be PETSC_DECIDE)
4094 .  n - This value should be the same as the local size used in creating the
4095        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4096        calculated if N is given) For square matrices n is almost always m.
4097 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4098 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4099 .   i - row indices
4100 .   j - column indices
4101 -   a - matrix values
4102 
4103    Output Parameter:
4104 .   mat - the matrix
4105 
4106    Level: intermediate
4107 
4108    Notes:
4109        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4110      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4111      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4112 
4113        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4114 
4115        The format which is used for the sparse matrix input, is equivalent to a
4116     row-major ordering.. i.e for the following matrix, the input data expected is
4117     as shown
4118 
4119 $        1 0 0
4120 $        2 0 3     P0
4121 $       -------
4122 $        4 5 6     P1
4123 $
4124 $     Process0 [P0]: rows_owned=[0,1]
4125 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4126 $        j =  {0,0,2}  [size = 3]
4127 $        v =  {1,2,3}  [size = 3]
4128 $
4129 $     Process1 [P1]: rows_owned=[2]
4130 $        i =  {0,3}    [size = nrow+1  = 1+1]
4131 $        j =  {0,1,2}  [size = 3]
4132 $        v =  {4,5,6}  [size = 3]
4133 
4134 .keywords: matrix, aij, compressed row, sparse, parallel
4135 
4136 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4137           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4138 @*/
4139 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4140 {
4141   PetscErrorCode ierr;
4142 
4143   PetscFunctionBegin;
4144   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4145   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4146   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4147   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4148   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4149   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4150   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4151   PetscFunctionReturn(0);
4152 }
4153 
4154 /*@C
4155    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4156    (the default parallel PETSc format).  For good matrix assembly performance
4157    the user should preallocate the matrix storage by setting the parameters
4158    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4159    performance can be increased by more than a factor of 50.
4160 
4161    Collective on MPI_Comm
4162 
4163    Input Parameters:
4164 +  comm - MPI communicator
4165 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4166            This value should be the same as the local size used in creating the
4167            y vector for the matrix-vector product y = Ax.
4168 .  n - This value should be the same as the local size used in creating the
4169        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4170        calculated if N is given) For square matrices n is almost always m.
4171 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4172 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4173 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4174            (same value is used for all local rows)
4175 .  d_nnz - array containing the number of nonzeros in the various rows of the
4176            DIAGONAL portion of the local submatrix (possibly different for each row)
4177            or NULL, if d_nz is used to specify the nonzero structure.
4178            The size of this array is equal to the number of local rows, i.e 'm'.
4179 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4180            submatrix (same value is used for all local rows).
4181 -  o_nnz - array containing the number of nonzeros in the various rows of the
4182            OFF-DIAGONAL portion of the local submatrix (possibly different for
4183            each row) or NULL, if o_nz is used to specify the nonzero
4184            structure. The size of this array is equal to the number
4185            of local rows, i.e 'm'.
4186 
4187    Output Parameter:
4188 .  A - the matrix
4189 
4190    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4191    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4192    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4193 
4194    Notes:
4195    If the *_nnz parameter is given then the *_nz parameter is ignored
4196 
4197    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4198    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4199    storage requirements for this matrix.
4200 
4201    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4202    processor than it must be used on all processors that share the object for
4203    that argument.
4204 
4205    The user MUST specify either the local or global matrix dimensions
4206    (possibly both).
4207 
4208    The parallel matrix is partitioned across processors such that the
4209    first m0 rows belong to process 0, the next m1 rows belong to
4210    process 1, the next m2 rows belong to process 2 etc.. where
4211    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4212    values corresponding to [m x N] submatrix.
4213 
4214    The columns are logically partitioned with the n0 columns belonging
4215    to 0th partition, the next n1 columns belonging to the next
4216    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4217 
4218    The DIAGONAL portion of the local submatrix on any given processor
4219    is the submatrix corresponding to the rows and columns m,n
4220    corresponding to the given processor. i.e diagonal matrix on
4221    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4222    etc. The remaining portion of the local submatrix [m x (N-n)]
4223    constitute the OFF-DIAGONAL portion. The example below better
4224    illustrates this concept.
4225 
4226    For a square global matrix we define each processor's diagonal portion
4227    to be its local rows and the corresponding columns (a square submatrix);
4228    each processor's off-diagonal portion encompasses the remainder of the
4229    local matrix (a rectangular submatrix).
4230 
4231    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4232 
4233    When calling this routine with a single process communicator, a matrix of
4234    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4235    type of communicator, use the construction mechanism
4236 .vb
4237      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4238 .ve
4239 
4240 $     MatCreate(...,&A);
4241 $     MatSetType(A,MATMPIAIJ);
4242 $     MatSetSizes(A, m,n,M,N);
4243 $     MatMPIAIJSetPreallocation(A,...);
4244 
4245    By default, this format uses inodes (identical nodes) when possible.
4246    We search for consecutive rows with the same nonzero structure, thereby
4247    reusing matrix information to achieve increased efficiency.
4248 
4249    Options Database Keys:
4250 +  -mat_no_inode  - Do not use inodes
4251 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4252 
4253 
4254 
4255    Example usage:
4256 
4257    Consider the following 8x8 matrix with 34 non-zero values, that is
4258    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4259    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4260    as follows
4261 
4262 .vb
4263             1  2  0  |  0  3  0  |  0  4
4264     Proc0   0  5  6  |  7  0  0  |  8  0
4265             9  0 10  | 11  0  0  | 12  0
4266     -------------------------------------
4267            13  0 14  | 15 16 17  |  0  0
4268     Proc1   0 18  0  | 19 20 21  |  0  0
4269             0  0  0  | 22 23  0  | 24  0
4270     -------------------------------------
4271     Proc2  25 26 27  |  0  0 28  | 29  0
4272            30  0  0  | 31 32 33  |  0 34
4273 .ve
4274 
4275    This can be represented as a collection of submatrices as
4276 
4277 .vb
4278       A B C
4279       D E F
4280       G H I
4281 .ve
4282 
4283    Where the submatrices A,B,C are owned by proc0, D,E,F are
4284    owned by proc1, G,H,I are owned by proc2.
4285 
4286    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4287    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4288    The 'M','N' parameters are 8,8, and have the same values on all procs.
4289 
4290    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4291    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4292    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4293    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4294    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4295    matrix, ans [DF] as another SeqAIJ matrix.
4296 
4297    When d_nz, o_nz parameters are specified, d_nz storage elements are
4298    allocated for every row of the local diagonal submatrix, and o_nz
4299    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4300    One way to choose d_nz and o_nz is to use the max nonzerors per local
4301    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4302    In this case, the values of d_nz,o_nz are
4303 .vb
4304      proc0 : dnz = 2, o_nz = 2
4305      proc1 : dnz = 3, o_nz = 2
4306      proc2 : dnz = 1, o_nz = 4
4307 .ve
4308    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4309    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4310    for proc3. i.e we are using 12+15+10=37 storage locations to store
4311    34 values.
4312 
4313    When d_nnz, o_nnz parameters are specified, the storage is specified
4314    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4315    In the above case the values for d_nnz,o_nnz are
4316 .vb
4317      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4318      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4319      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4320 .ve
4321    Here the space allocated is sum of all the above values i.e 34, and
4322    hence pre-allocation is perfect.
4323 
4324    Level: intermediate
4325 
4326 .keywords: matrix, aij, compressed row, sparse, parallel
4327 
4328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4329           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4330 @*/
4331 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4332 {
4333   PetscErrorCode ierr;
4334   PetscMPIInt    size;
4335 
4336   PetscFunctionBegin;
4337   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4338   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4339   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4340   if (size > 1) {
4341     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4342     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4343   } else {
4344     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4345     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4346   }
4347   PetscFunctionReturn(0);
4348 }
4349 
4350 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4351 {
4352   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4353   PetscBool      flg;
4354   PetscErrorCode ierr;
4355 
4356   PetscFunctionBegin;
4357   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4358   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4359   if (Ad)     *Ad     = a->A;
4360   if (Ao)     *Ao     = a->B;
4361   if (colmap) *colmap = a->garray;
4362   PetscFunctionReturn(0);
4363 }
4364 
4365 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4366 {
4367   PetscErrorCode ierr;
4368   PetscInt       m,N,i,rstart,nnz,Ii;
4369   PetscInt       *indx;
4370   PetscScalar    *values;
4371 
4372   PetscFunctionBegin;
4373   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4374   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4375     PetscInt       *dnz,*onz,sum,bs,cbs;
4376 
4377     if (n == PETSC_DECIDE) {
4378       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4379     }
4380     /* Check sum(n) = N */
4381     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4382     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4383 
4384     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4385     rstart -= m;
4386 
4387     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4388     for (i=0; i<m; i++) {
4389       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4390       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4391       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4392     }
4393 
4394     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4395     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4396     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4397     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4398     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4399     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4400     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4401     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4402   }
4403 
4404   /* numeric phase */
4405   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4406   for (i=0; i<m; i++) {
4407     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4408     Ii   = i + rstart;
4409     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4410     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4411   }
4412   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4413   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4418 {
4419   PetscErrorCode    ierr;
4420   PetscMPIInt       rank;
4421   PetscInt          m,N,i,rstart,nnz;
4422   size_t            len;
4423   const PetscInt    *indx;
4424   PetscViewer       out;
4425   char              *name;
4426   Mat               B;
4427   const PetscScalar *values;
4428 
4429   PetscFunctionBegin;
4430   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4431   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4432   /* Should this be the type of the diagonal block of A? */
4433   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4434   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4435   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4436   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4437   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4438   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4439   for (i=0; i<m; i++) {
4440     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4441     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4442     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4443   }
4444   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4445   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4446 
4447   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4448   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4449   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4450   sprintf(name,"%s.%d",outfile,rank);
4451   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4452   ierr = PetscFree(name);CHKERRQ(ierr);
4453   ierr = MatView(B,out);CHKERRQ(ierr);
4454   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4455   ierr = MatDestroy(&B);CHKERRQ(ierr);
4456   PetscFunctionReturn(0);
4457 }
4458 
4459 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4460 {
4461   PetscErrorCode      ierr;
4462   Mat_Merge_SeqsToMPI *merge;
4463   PetscContainer      container;
4464 
4465   PetscFunctionBegin;
4466   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4467   if (container) {
4468     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4481     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4482     ierr = PetscFree(merge);CHKERRQ(ierr);
4483     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4484   }
4485   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4486   PetscFunctionReturn(0);
4487 }
4488 
4489 #include <../src/mat/utils/freespace.h>
4490 #include <petscbt.h>
4491 
4492 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4493 {
4494   PetscErrorCode      ierr;
4495   MPI_Comm            comm;
4496   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4497   PetscMPIInt         size,rank,taga,*len_s;
4498   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4499   PetscInt            proc,m;
4500   PetscInt            **buf_ri,**buf_rj;
4501   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4502   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4503   MPI_Request         *s_waits,*r_waits;
4504   MPI_Status          *status;
4505   MatScalar           *aa=a->a;
4506   MatScalar           **abuf_r,*ba_i;
4507   Mat_Merge_SeqsToMPI *merge;
4508   PetscContainer      container;
4509 
4510   PetscFunctionBegin;
4511   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4512   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4513 
4514   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4515   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4516 
4517   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4518   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4519 
4520   bi     = merge->bi;
4521   bj     = merge->bj;
4522   buf_ri = merge->buf_ri;
4523   buf_rj = merge->buf_rj;
4524 
4525   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4526   owners = merge->rowmap->range;
4527   len_s  = merge->len_s;
4528 
4529   /* send and recv matrix values */
4530   /*-----------------------------*/
4531   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4532   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4533 
4534   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4535   for (proc=0,k=0; proc<size; proc++) {
4536     if (!len_s[proc]) continue;
4537     i    = owners[proc];
4538     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4539     k++;
4540   }
4541 
4542   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4543   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4544   ierr = PetscFree(status);CHKERRQ(ierr);
4545 
4546   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4547   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4548 
4549   /* insert mat values of mpimat */
4550   /*----------------------------*/
4551   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4552   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4553 
4554   for (k=0; k<merge->nrecv; k++) {
4555     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4556     nrows       = *(buf_ri_k[k]);
4557     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4558     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4559   }
4560 
4561   /* set values of ba */
4562   m = merge->rowmap->n;
4563   for (i=0; i<m; i++) {
4564     arow = owners[rank] + i;
4565     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4566     bnzi = bi[i+1] - bi[i];
4567     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4568 
4569     /* add local non-zero vals of this proc's seqmat into ba */
4570     anzi   = ai[arow+1] - ai[arow];
4571     aj     = a->j + ai[arow];
4572     aa     = a->a + ai[arow];
4573     nextaj = 0;
4574     for (j=0; nextaj<anzi; j++) {
4575       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4576         ba_i[j] += aa[nextaj++];
4577       }
4578     }
4579 
4580     /* add received vals into ba */
4581     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4582       /* i-th row */
4583       if (i == *nextrow[k]) {
4584         anzi   = *(nextai[k]+1) - *nextai[k];
4585         aj     = buf_rj[k] + *(nextai[k]);
4586         aa     = abuf_r[k] + *(nextai[k]);
4587         nextaj = 0;
4588         for (j=0; nextaj<anzi; j++) {
4589           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4590             ba_i[j] += aa[nextaj++];
4591           }
4592         }
4593         nextrow[k]++; nextai[k]++;
4594       }
4595     }
4596     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4597   }
4598   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4599   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4600 
4601   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4602   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4603   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4604   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4605   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4606   PetscFunctionReturn(0);
4607 }
4608 
4609 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4610 {
4611   PetscErrorCode      ierr;
4612   Mat                 B_mpi;
4613   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4614   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4615   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4616   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4617   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4618   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4619   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4620   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4621   MPI_Status          *status;
4622   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4623   PetscBT             lnkbt;
4624   Mat_Merge_SeqsToMPI *merge;
4625   PetscContainer      container;
4626 
4627   PetscFunctionBegin;
4628   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4629 
4630   /* make sure it is a PETSc comm */
4631   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4632   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4633   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4634 
4635   ierr = PetscNew(&merge);CHKERRQ(ierr);
4636   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4637 
4638   /* determine row ownership */
4639   /*---------------------------------------------------------*/
4640   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4641   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4642   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4643   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4644   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4645   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4646   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4647 
4648   m      = merge->rowmap->n;
4649   owners = merge->rowmap->range;
4650 
4651   /* determine the number of messages to send, their lengths */
4652   /*---------------------------------------------------------*/
4653   len_s = merge->len_s;
4654 
4655   len          = 0; /* length of buf_si[] */
4656   merge->nsend = 0;
4657   for (proc=0; proc<size; proc++) {
4658     len_si[proc] = 0;
4659     if (proc == rank) {
4660       len_s[proc] = 0;
4661     } else {
4662       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4663       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4664     }
4665     if (len_s[proc]) {
4666       merge->nsend++;
4667       nrows = 0;
4668       for (i=owners[proc]; i<owners[proc+1]; i++) {
4669         if (ai[i+1] > ai[i]) nrows++;
4670       }
4671       len_si[proc] = 2*(nrows+1);
4672       len         += len_si[proc];
4673     }
4674   }
4675 
4676   /* determine the number and length of messages to receive for ij-structure */
4677   /*-------------------------------------------------------------------------*/
4678   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4679   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4680 
4681   /* post the Irecv of j-structure */
4682   /*-------------------------------*/
4683   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4684   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4685 
4686   /* post the Isend of j-structure */
4687   /*--------------------------------*/
4688   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4689 
4690   for (proc=0, k=0; proc<size; proc++) {
4691     if (!len_s[proc]) continue;
4692     i    = owners[proc];
4693     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4694     k++;
4695   }
4696 
4697   /* receives and sends of j-structure are complete */
4698   /*------------------------------------------------*/
4699   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4700   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4701 
4702   /* send and recv i-structure */
4703   /*---------------------------*/
4704   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4705   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4706 
4707   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4708   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4709   for (proc=0,k=0; proc<size; proc++) {
4710     if (!len_s[proc]) continue;
4711     /* form outgoing message for i-structure:
4712          buf_si[0]:                 nrows to be sent
4713                [1:nrows]:           row index (global)
4714                [nrows+1:2*nrows+1]: i-structure index
4715     */
4716     /*-------------------------------------------*/
4717     nrows       = len_si[proc]/2 - 1;
4718     buf_si_i    = buf_si + nrows+1;
4719     buf_si[0]   = nrows;
4720     buf_si_i[0] = 0;
4721     nrows       = 0;
4722     for (i=owners[proc]; i<owners[proc+1]; i++) {
4723       anzi = ai[i+1] - ai[i];
4724       if (anzi) {
4725         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4726         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4727         nrows++;
4728       }
4729     }
4730     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4731     k++;
4732     buf_si += len_si[proc];
4733   }
4734 
4735   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4736   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4737 
4738   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4739   for (i=0; i<merge->nrecv; i++) {
4740     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4741   }
4742 
4743   ierr = PetscFree(len_si);CHKERRQ(ierr);
4744   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4745   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4746   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4747   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4748   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4749   ierr = PetscFree(status);CHKERRQ(ierr);
4750 
4751   /* compute a local seq matrix in each processor */
4752   /*----------------------------------------------*/
4753   /* allocate bi array and free space for accumulating nonzero column info */
4754   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4755   bi[0] = 0;
4756 
4757   /* create and initialize a linked list */
4758   nlnk = N+1;
4759   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4760 
4761   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4762   len  = ai[owners[rank+1]] - ai[owners[rank]];
4763   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4764 
4765   current_space = free_space;
4766 
4767   /* determine symbolic info for each local row */
4768   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4769 
4770   for (k=0; k<merge->nrecv; k++) {
4771     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4772     nrows       = *buf_ri_k[k];
4773     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4774     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4775   }
4776 
4777   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4778   len  = 0;
4779   for (i=0; i<m; i++) {
4780     bnzi = 0;
4781     /* add local non-zero cols of this proc's seqmat into lnk */
4782     arow  = owners[rank] + i;
4783     anzi  = ai[arow+1] - ai[arow];
4784     aj    = a->j + ai[arow];
4785     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4786     bnzi += nlnk;
4787     /* add received col data into lnk */
4788     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4789       if (i == *nextrow[k]) { /* i-th row */
4790         anzi  = *(nextai[k]+1) - *nextai[k];
4791         aj    = buf_rj[k] + *nextai[k];
4792         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4793         bnzi += nlnk;
4794         nextrow[k]++; nextai[k]++;
4795       }
4796     }
4797     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4798 
4799     /* if free space is not available, make more free space */
4800     if (current_space->local_remaining<bnzi) {
4801       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4802       nspacedouble++;
4803     }
4804     /* copy data into free space, then initialize lnk */
4805     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4806     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4807 
4808     current_space->array           += bnzi;
4809     current_space->local_used      += bnzi;
4810     current_space->local_remaining -= bnzi;
4811 
4812     bi[i+1] = bi[i] + bnzi;
4813   }
4814 
4815   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4816 
4817   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4818   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4819   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4820 
4821   /* create symbolic parallel matrix B_mpi */
4822   /*---------------------------------------*/
4823   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4824   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4825   if (n==PETSC_DECIDE) {
4826     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4827   } else {
4828     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4829   }
4830   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4831   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4832   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4833   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4834   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4835 
4836   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4837   B_mpi->assembled    = PETSC_FALSE;
4838   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4839   merge->bi           = bi;
4840   merge->bj           = bj;
4841   merge->buf_ri       = buf_ri;
4842   merge->buf_rj       = buf_rj;
4843   merge->coi          = NULL;
4844   merge->coj          = NULL;
4845   merge->owners_co    = NULL;
4846 
4847   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4848 
4849   /* attach the supporting struct to B_mpi for reuse */
4850   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4851   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4852   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4853   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4854   *mpimat = B_mpi;
4855 
4856   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4857   PetscFunctionReturn(0);
4858 }
4859 
4860 /*@C
4861       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4862                  matrices from each processor
4863 
4864     Collective on MPI_Comm
4865 
4866    Input Parameters:
4867 +    comm - the communicators the parallel matrix will live on
4868 .    seqmat - the input sequential matrices
4869 .    m - number of local rows (or PETSC_DECIDE)
4870 .    n - number of local columns (or PETSC_DECIDE)
4871 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4872 
4873    Output Parameter:
4874 .    mpimat - the parallel matrix generated
4875 
4876     Level: advanced
4877 
4878    Notes:
4879      The dimensions of the sequential matrix in each processor MUST be the same.
4880      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4881      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4882 @*/
4883 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4884 {
4885   PetscErrorCode ierr;
4886   PetscMPIInt    size;
4887 
4888   PetscFunctionBegin;
4889   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4890   if (size == 1) {
4891     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4892     if (scall == MAT_INITIAL_MATRIX) {
4893       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4894     } else {
4895       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4896     }
4897     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4898     PetscFunctionReturn(0);
4899   }
4900   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4901   if (scall == MAT_INITIAL_MATRIX) {
4902     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4903   }
4904   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4905   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4906   PetscFunctionReturn(0);
4907 }
4908 
4909 /*@
4910      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4911           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4912           with MatGetSize()
4913 
4914     Not Collective
4915 
4916    Input Parameters:
4917 +    A - the matrix
4918 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4919 
4920    Output Parameter:
4921 .    A_loc - the local sequential matrix generated
4922 
4923     Level: developer
4924 
4925 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4926 
4927 @*/
4928 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4929 {
4930   PetscErrorCode ierr;
4931   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4932   Mat_SeqAIJ     *mat,*a,*b;
4933   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4934   MatScalar      *aa,*ba,*cam;
4935   PetscScalar    *ca;
4936   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4937   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4938   PetscBool      match;
4939   MPI_Comm       comm;
4940   PetscMPIInt    size;
4941 
4942   PetscFunctionBegin;
4943   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4944   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4945   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4946   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4947   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4948 
4949   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4950   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4951   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4952   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4953   aa = a->a; ba = b->a;
4954   if (scall == MAT_INITIAL_MATRIX) {
4955     if (size == 1) {
4956       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4957       PetscFunctionReturn(0);
4958     }
4959 
4960     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4961     ci[0] = 0;
4962     for (i=0; i<am; i++) {
4963       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4964     }
4965     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4966     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4967     k    = 0;
4968     for (i=0; i<am; i++) {
4969       ncols_o = bi[i+1] - bi[i];
4970       ncols_d = ai[i+1] - ai[i];
4971       /* off-diagonal portion of A */
4972       for (jo=0; jo<ncols_o; jo++) {
4973         col = cmap[*bj];
4974         if (col >= cstart) break;
4975         cj[k]   = col; bj++;
4976         ca[k++] = *ba++;
4977       }
4978       /* diagonal portion of A */
4979       for (j=0; j<ncols_d; j++) {
4980         cj[k]   = cstart + *aj++;
4981         ca[k++] = *aa++;
4982       }
4983       /* off-diagonal portion of A */
4984       for (j=jo; j<ncols_o; j++) {
4985         cj[k]   = cmap[*bj++];
4986         ca[k++] = *ba++;
4987       }
4988     }
4989     /* put together the new matrix */
4990     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4991     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4992     /* Since these are PETSc arrays, change flags to free them as necessary. */
4993     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4994     mat->free_a  = PETSC_TRUE;
4995     mat->free_ij = PETSC_TRUE;
4996     mat->nonew   = 0;
4997   } else if (scall == MAT_REUSE_MATRIX) {
4998     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4999     ci = mat->i; cj = mat->j; cam = mat->a;
5000     for (i=0; i<am; i++) {
5001       /* off-diagonal portion of A */
5002       ncols_o = bi[i+1] - bi[i];
5003       for (jo=0; jo<ncols_o; jo++) {
5004         col = cmap[*bj];
5005         if (col >= cstart) break;
5006         *cam++ = *ba++; bj++;
5007       }
5008       /* diagonal portion of A */
5009       ncols_d = ai[i+1] - ai[i];
5010       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5011       /* off-diagonal portion of A */
5012       for (j=jo; j<ncols_o; j++) {
5013         *cam++ = *ba++; bj++;
5014       }
5015     }
5016   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5017   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5018   PetscFunctionReturn(0);
5019 }
5020 
5021 /*@C
5022      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5023 
5024     Not Collective
5025 
5026    Input Parameters:
5027 +    A - the matrix
5028 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5029 -    row, col - index sets of rows and columns to extract (or NULL)
5030 
5031    Output Parameter:
5032 .    A_loc - the local sequential matrix generated
5033 
5034     Level: developer
5035 
5036 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5037 
5038 @*/
5039 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5040 {
5041   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5042   PetscErrorCode ierr;
5043   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5044   IS             isrowa,iscola;
5045   Mat            *aloc;
5046   PetscBool      match;
5047 
5048   PetscFunctionBegin;
5049   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5050   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5051   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5052   if (!row) {
5053     start = A->rmap->rstart; end = A->rmap->rend;
5054     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5055   } else {
5056     isrowa = *row;
5057   }
5058   if (!col) {
5059     start = A->cmap->rstart;
5060     cmap  = a->garray;
5061     nzA   = a->A->cmap->n;
5062     nzB   = a->B->cmap->n;
5063     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5064     ncols = 0;
5065     for (i=0; i<nzB; i++) {
5066       if (cmap[i] < start) idx[ncols++] = cmap[i];
5067       else break;
5068     }
5069     imark = i;
5070     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5071     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5072     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5073   } else {
5074     iscola = *col;
5075   }
5076   if (scall != MAT_INITIAL_MATRIX) {
5077     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5078     aloc[0] = *A_loc;
5079   }
5080   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5081   if (!col) { /* attach global id of condensed columns */
5082     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5083   }
5084   *A_loc = aloc[0];
5085   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5086   if (!row) {
5087     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5088   }
5089   if (!col) {
5090     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5091   }
5092   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5093   PetscFunctionReturn(0);
5094 }
5095 
5096 /*@C
5097     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5098 
5099     Collective on Mat
5100 
5101    Input Parameters:
5102 +    A,B - the matrices in mpiaij format
5103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5105 
5106    Output Parameter:
5107 +    rowb, colb - index sets of rows and columns of B to extract
5108 -    B_seq - the sequential matrix generated
5109 
5110     Level: developer
5111 
5112 @*/
5113 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5114 {
5115   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5116   PetscErrorCode ierr;
5117   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5118   IS             isrowb,iscolb;
5119   Mat            *bseq=NULL;
5120 
5121   PetscFunctionBegin;
5122   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5123     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5124   }
5125   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5126 
5127   if (scall == MAT_INITIAL_MATRIX) {
5128     start = A->cmap->rstart;
5129     cmap  = a->garray;
5130     nzA   = a->A->cmap->n;
5131     nzB   = a->B->cmap->n;
5132     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5133     ncols = 0;
5134     for (i=0; i<nzB; i++) {  /* row < local row index */
5135       if (cmap[i] < start) idx[ncols++] = cmap[i];
5136       else break;
5137     }
5138     imark = i;
5139     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5140     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5141     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5142     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5143   } else {
5144     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5145     isrowb  = *rowb; iscolb = *colb;
5146     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5147     bseq[0] = *B_seq;
5148   }
5149   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5150   *B_seq = bseq[0];
5151   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5152   if (!rowb) {
5153     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5154   } else {
5155     *rowb = isrowb;
5156   }
5157   if (!colb) {
5158     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5159   } else {
5160     *colb = iscolb;
5161   }
5162   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5163   PetscFunctionReturn(0);
5164 }
5165 
5166 /*
5167     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5168     of the OFF-DIAGONAL portion of local A
5169 
5170     Collective on Mat
5171 
5172    Input Parameters:
5173 +    A,B - the matrices in mpiaij format
5174 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5175 
5176    Output Parameter:
5177 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5178 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5179 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5180 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5181 
5182     Level: developer
5183 
5184 */
5185 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5186 {
5187   VecScatter_MPI_General *gen_to,*gen_from;
5188   PetscErrorCode         ierr;
5189   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5190   Mat_SeqAIJ             *b_oth;
5191   VecScatter             ctx;
5192   MPI_Comm               comm;
5193   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5194   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5195   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5196   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5197   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5198   MPI_Request            *rwaits = NULL,*swaits = NULL;
5199   MPI_Status             *sstatus,rstatus;
5200   PetscMPIInt            jj,size;
5201   VecScatterType         type;
5202   PetscBool              mpi1;
5203 
5204   PetscFunctionBegin;
5205   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5206   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5207 
5208   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5209     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5210   }
5211   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5212   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5213 
5214   if (size == 1) {
5215     startsj_s = NULL;
5216     bufa_ptr  = NULL;
5217     *B_oth    = NULL;
5218     PetscFunctionReturn(0);
5219   }
5220 
5221   ctx = a->Mvctx;
5222   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5223   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5224   if (!mpi1) {
5225     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5226      thus create a->Mvctx_mpi1 */
5227     if (!a->Mvctx_mpi1) {
5228       a->Mvctx_mpi1_flg = PETSC_TRUE;
5229       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5230     }
5231     ctx = a->Mvctx_mpi1;
5232   }
5233   tag = ((PetscObject)ctx)->tag;
5234 
5235   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5236   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5237   nrecvs   = gen_from->n;
5238   nsends   = gen_to->n;
5239 
5240   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5241   srow    = gen_to->indices;    /* local row index to be sent */
5242   sstarts = gen_to->starts;
5243   sprocs  = gen_to->procs;
5244   sstatus = gen_to->sstatus;
5245   sbs     = gen_to->bs;
5246   rstarts = gen_from->starts;
5247   rprocs  = gen_from->procs;
5248   rbs     = gen_from->bs;
5249 
5250   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5251   if (scall == MAT_INITIAL_MATRIX) {
5252     /* i-array */
5253     /*---------*/
5254     /*  post receives */
5255     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5256     for (i=0; i<nrecvs; i++) {
5257       rowlen = rvalues + rstarts[i]*rbs;
5258       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5259       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5260     }
5261 
5262     /* pack the outgoing message */
5263     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5264 
5265     sstartsj[0] = 0;
5266     rstartsj[0] = 0;
5267     len         = 0; /* total length of j or a array to be sent */
5268     k           = 0;
5269     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5270     for (i=0; i<nsends; i++) {
5271       rowlen = svalues + sstarts[i]*sbs;
5272       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5273       for (j=0; j<nrows; j++) {
5274         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5275         for (l=0; l<sbs; l++) {
5276           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5277 
5278           rowlen[j*sbs+l] = ncols;
5279 
5280           len += ncols;
5281           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5282         }
5283         k++;
5284       }
5285       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5286 
5287       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5288     }
5289     /* recvs and sends of i-array are completed */
5290     i = nrecvs;
5291     while (i--) {
5292       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5293     }
5294     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5295     ierr = PetscFree(svalues);CHKERRQ(ierr);
5296 
5297     /* allocate buffers for sending j and a arrays */
5298     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5299     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5300 
5301     /* create i-array of B_oth */
5302     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5303 
5304     b_othi[0] = 0;
5305     len       = 0; /* total length of j or a array to be received */
5306     k         = 0;
5307     for (i=0; i<nrecvs; i++) {
5308       rowlen = rvalues + rstarts[i]*rbs;
5309       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5310       for (j=0; j<nrows; j++) {
5311         b_othi[k+1] = b_othi[k] + rowlen[j];
5312         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5313         k++;
5314       }
5315       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5316     }
5317     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5318 
5319     /* allocate space for j and a arrrays of B_oth */
5320     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5321     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5322 
5323     /* j-array */
5324     /*---------*/
5325     /*  post receives of j-array */
5326     for (i=0; i<nrecvs; i++) {
5327       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5328       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5329     }
5330 
5331     /* pack the outgoing message j-array */
5332     k = 0;
5333     for (i=0; i<nsends; i++) {
5334       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5335       bufJ  = bufj+sstartsj[i];
5336       for (j=0; j<nrows; j++) {
5337         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5338         for (ll=0; ll<sbs; ll++) {
5339           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5340           for (l=0; l<ncols; l++) {
5341             *bufJ++ = cols[l];
5342           }
5343           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5344         }
5345       }
5346       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5347     }
5348 
5349     /* recvs and sends of j-array are completed */
5350     i = nrecvs;
5351     while (i--) {
5352       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5353     }
5354     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5355   } else if (scall == MAT_REUSE_MATRIX) {
5356     sstartsj = *startsj_s;
5357     rstartsj = *startsj_r;
5358     bufa     = *bufa_ptr;
5359     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5360     b_otha   = b_oth->a;
5361   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5362 
5363   /* a-array */
5364   /*---------*/
5365   /*  post receives of a-array */
5366   for (i=0; i<nrecvs; i++) {
5367     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5368     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5369   }
5370 
5371   /* pack the outgoing message a-array */
5372   k = 0;
5373   for (i=0; i<nsends; i++) {
5374     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5375     bufA  = bufa+sstartsj[i];
5376     for (j=0; j<nrows; j++) {
5377       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5378       for (ll=0; ll<sbs; ll++) {
5379         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5380         for (l=0; l<ncols; l++) {
5381           *bufA++ = vals[l];
5382         }
5383         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5384       }
5385     }
5386     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5387   }
5388   /* recvs and sends of a-array are completed */
5389   i = nrecvs;
5390   while (i--) {
5391     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5392   }
5393   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5394   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5395 
5396   if (scall == MAT_INITIAL_MATRIX) {
5397     /* put together the new matrix */
5398     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5399 
5400     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5401     /* Since these are PETSc arrays, change flags to free them as necessary. */
5402     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5403     b_oth->free_a  = PETSC_TRUE;
5404     b_oth->free_ij = PETSC_TRUE;
5405     b_oth->nonew   = 0;
5406 
5407     ierr = PetscFree(bufj);CHKERRQ(ierr);
5408     if (!startsj_s || !bufa_ptr) {
5409       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5410       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5411     } else {
5412       *startsj_s = sstartsj;
5413       *startsj_r = rstartsj;
5414       *bufa_ptr  = bufa;
5415     }
5416   }
5417   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5418   PetscFunctionReturn(0);
5419 }
5420 
5421 /*@C
5422   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5423 
5424   Not Collective
5425 
5426   Input Parameters:
5427 . A - The matrix in mpiaij format
5428 
5429   Output Parameter:
5430 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5431 . colmap - A map from global column index to local index into lvec
5432 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5433 
5434   Level: developer
5435 
5436 @*/
5437 #if defined(PETSC_USE_CTABLE)
5438 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5439 #else
5440 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5441 #endif
5442 {
5443   Mat_MPIAIJ *a;
5444 
5445   PetscFunctionBegin;
5446   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5447   PetscValidPointer(lvec, 2);
5448   PetscValidPointer(colmap, 3);
5449   PetscValidPointer(multScatter, 4);
5450   a = (Mat_MPIAIJ*) A->data;
5451   if (lvec) *lvec = a->lvec;
5452   if (colmap) *colmap = a->colmap;
5453   if (multScatter) *multScatter = a->Mvctx;
5454   PetscFunctionReturn(0);
5455 }
5456 
5457 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5458 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5459 #if defined(PETSC_HAVE_MKL_SPARSE)
5460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5461 #endif
5462 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5463 #if defined(PETSC_HAVE_ELEMENTAL)
5464 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5465 #endif
5466 #if defined(PETSC_HAVE_HYPRE)
5467 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5468 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5469 #endif
5470 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5471 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5472 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5473 
5474 /*
5475     Computes (B'*A')' since computing B*A directly is untenable
5476 
5477                n                       p                          p
5478         (              )       (              )         (                  )
5479       m (      A       )  *  n (       B      )   =   m (         C        )
5480         (              )       (              )         (                  )
5481 
5482 */
5483 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5484 {
5485   PetscErrorCode ierr;
5486   Mat            At,Bt,Ct;
5487 
5488   PetscFunctionBegin;
5489   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5490   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5491   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5492   ierr = MatDestroy(&At);CHKERRQ(ierr);
5493   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5494   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5495   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5496   PetscFunctionReturn(0);
5497 }
5498 
5499 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5500 {
5501   PetscErrorCode ierr;
5502   PetscInt       m=A->rmap->n,n=B->cmap->n;
5503   Mat            Cmat;
5504 
5505   PetscFunctionBegin;
5506   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5507   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5508   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5509   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5510   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5511   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5512   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5513   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5514 
5515   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5516 
5517   *C = Cmat;
5518   PetscFunctionReturn(0);
5519 }
5520 
5521 /* ----------------------------------------------------------------*/
5522 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5523 {
5524   PetscErrorCode ierr;
5525 
5526   PetscFunctionBegin;
5527   if (scall == MAT_INITIAL_MATRIX) {
5528     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5529     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5530     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5531   }
5532   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5533   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5534   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5535   PetscFunctionReturn(0);
5536 }
5537 
5538 /*MC
5539    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5540 
5541    Options Database Keys:
5542 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5543 
5544   Level: beginner
5545 
5546 .seealso: MatCreateAIJ()
5547 M*/
5548 
5549 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5550 {
5551   Mat_MPIAIJ     *b;
5552   PetscErrorCode ierr;
5553   PetscMPIInt    size;
5554 
5555   PetscFunctionBegin;
5556   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5557 
5558   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5559   B->data       = (void*)b;
5560   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5561   B->assembled  = PETSC_FALSE;
5562   B->insertmode = NOT_SET_VALUES;
5563   b->size       = size;
5564 
5565   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5566 
5567   /* build cache for off array entries formed */
5568   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5569 
5570   b->donotstash  = PETSC_FALSE;
5571   b->colmap      = 0;
5572   b->garray      = 0;
5573   b->roworiented = PETSC_TRUE;
5574 
5575   /* stuff used for matrix vector multiply */
5576   b->lvec  = NULL;
5577   b->Mvctx = NULL;
5578 
5579   /* stuff for MatGetRow() */
5580   b->rowindices   = 0;
5581   b->rowvalues    = 0;
5582   b->getrowactive = PETSC_FALSE;
5583 
5584   /* flexible pointer used in CUSP/CUSPARSE classes */
5585   b->spptr = NULL;
5586 
5587   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5590   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5596 #if defined(PETSC_HAVE_MKL_SPARSE)
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5598 #endif
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5601 #if defined(PETSC_HAVE_ELEMENTAL)
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5603 #endif
5604 #if defined(PETSC_HAVE_HYPRE)
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5606 #endif
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5612 #if defined(PETSC_HAVE_HYPRE)
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5614 #endif
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5617   PetscFunctionReturn(0);
5618 }
5619 
5620 /*@C
5621      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5622          and "off-diagonal" part of the matrix in CSR format.
5623 
5624    Collective on MPI_Comm
5625 
5626    Input Parameters:
5627 +  comm - MPI communicator
5628 .  m - number of local rows (Cannot be PETSC_DECIDE)
5629 .  n - This value should be the same as the local size used in creating the
5630        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5631        calculated if N is given) For square matrices n is almost always m.
5632 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5633 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5634 .   i - row indices for "diagonal" portion of matrix
5635 .   j - column indices
5636 .   a - matrix values
5637 .   oi - row indices for "off-diagonal" portion of matrix
5638 .   oj - column indices
5639 -   oa - matrix values
5640 
5641    Output Parameter:
5642 .   mat - the matrix
5643 
5644    Level: advanced
5645 
5646    Notes:
5647        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5648        must free the arrays once the matrix has been destroyed and not before.
5649 
5650        The i and j indices are 0 based
5651 
5652        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5653 
5654        This sets local rows and cannot be used to set off-processor values.
5655 
5656        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5657        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5658        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5659        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5660        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5661        communication if it is known that only local entries will be set.
5662 
5663 .keywords: matrix, aij, compressed row, sparse, parallel
5664 
5665 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5666           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5667 @*/
5668 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5669 {
5670   PetscErrorCode ierr;
5671   Mat_MPIAIJ     *maij;
5672 
5673   PetscFunctionBegin;
5674   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5675   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5676   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5677   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5678   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5679   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5680   maij = (Mat_MPIAIJ*) (*mat)->data;
5681 
5682   (*mat)->preallocated = PETSC_TRUE;
5683 
5684   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5685   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5686 
5687   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5688   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5689 
5690   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694 
5695   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5696   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5699   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5700   PetscFunctionReturn(0);
5701 }
5702 
5703 /*
5704     Special version for direct calls from Fortran
5705 */
5706 #include <petsc/private/fortranimpl.h>
5707 
5708 /* Change these macros so can be used in void function */
5709 #undef CHKERRQ
5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5711 #undef SETERRQ2
5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5713 #undef SETERRQ3
5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5715 #undef SETERRQ
5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5717 
5718 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5719 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5721 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5722 #else
5723 #endif
5724 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5725 {
5726   Mat            mat  = *mmat;
5727   PetscInt       m    = *mm, n = *mn;
5728   InsertMode     addv = *maddv;
5729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5730   PetscScalar    value;
5731   PetscErrorCode ierr;
5732 
5733   MatCheckPreallocated(mat,1);
5734   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5735 
5736 #if defined(PETSC_USE_DEBUG)
5737   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5738 #endif
5739   {
5740     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5741     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5742     PetscBool roworiented = aij->roworiented;
5743 
5744     /* Some Variables required in the macro */
5745     Mat        A                 = aij->A;
5746     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5747     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5748     MatScalar  *aa               = a->a;
5749     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5750     Mat        B                 = aij->B;
5751     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5752     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5753     MatScalar  *ba               = b->a;
5754 
5755     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5756     PetscInt  nonew = a->nonew;
5757     MatScalar *ap1,*ap2;
5758 
5759     PetscFunctionBegin;
5760     for (i=0; i<m; i++) {
5761       if (im[i] < 0) continue;
5762 #if defined(PETSC_USE_DEBUG)
5763       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5764 #endif
5765       if (im[i] >= rstart && im[i] < rend) {
5766         row      = im[i] - rstart;
5767         lastcol1 = -1;
5768         rp1      = aj + ai[row];
5769         ap1      = aa + ai[row];
5770         rmax1    = aimax[row];
5771         nrow1    = ailen[row];
5772         low1     = 0;
5773         high1    = nrow1;
5774         lastcol2 = -1;
5775         rp2      = bj + bi[row];
5776         ap2      = ba + bi[row];
5777         rmax2    = bimax[row];
5778         nrow2    = bilen[row];
5779         low2     = 0;
5780         high2    = nrow2;
5781 
5782         for (j=0; j<n; j++) {
5783           if (roworiented) value = v[i*n+j];
5784           else value = v[i+j*m];
5785           if (in[j] >= cstart && in[j] < cend) {
5786             col = in[j] - cstart;
5787             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5788             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5789           } else if (in[j] < 0) continue;
5790 #if defined(PETSC_USE_DEBUG)
5791           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5792           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5793 #endif
5794           else {
5795             if (mat->was_assembled) {
5796               if (!aij->colmap) {
5797                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5798               }
5799 #if defined(PETSC_USE_CTABLE)
5800               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5801               col--;
5802 #else
5803               col = aij->colmap[in[j]] - 1;
5804 #endif
5805               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5806               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5807                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5808                 col  =  in[j];
5809                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5810                 B     = aij->B;
5811                 b     = (Mat_SeqAIJ*)B->data;
5812                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5813                 rp2   = bj + bi[row];
5814                 ap2   = ba + bi[row];
5815                 rmax2 = bimax[row];
5816                 nrow2 = bilen[row];
5817                 low2  = 0;
5818                 high2 = nrow2;
5819                 bm    = aij->B->rmap->n;
5820                 ba    = b->a;
5821               }
5822             } else col = in[j];
5823             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5824           }
5825         }
5826       } else if (!aij->donotstash) {
5827         if (roworiented) {
5828           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5829         } else {
5830           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5831         }
5832       }
5833     }
5834   }
5835   PetscFunctionReturnVoid();
5836 }
5837 
5838