xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9eaa4dc2a1dca61b2e6d66e2eb20f4e6146d5f84)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 PetscErrorCode MatSetValues_MPIAIJ_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz)
636 {
637   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
638   Mat            A      = aij->A; /* diagonal part of the matrix */
639   Mat            B      = aij->B; /* offdiagonal part of the matrix */
640   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
641   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
642   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
643   PetscInt       *ailen = a->ilen,*aj = a->j;
644   PetscInt       *bilen = b->ilen,*bj = b->j;
645   PetscInt       am     = aij->A->rmap->n,i,j,k;
646   PetscInt       col, diag_so_far=0, offd_so_far=0;
647 
648   PetscFunctionBegin;
649   /* Iterate over all rows of the matrix */
650   for (j=0; j<am; j++) {
651     /* set the left off-diagonal */
652     for (i=0; i<onz[j]; i++) {
653       col = i + mat_i[j];
654       /* If column is in the diagonal... */
655       if (mat_j[col] >= cstart && mat_j[col] < cend) {
656         break;
657       }
658       bj[i+offd_so_far] = mat_j[col];
659     }
660     /* ... set the diagonal elements */
661     for (k=0; k<dnz[j]; k++) {
662       col = k + mat_i[j] + i;
663       aj[diag_so_far+k] = mat_j[col] - cstart;
664       ailen[j] = dnz[j];
665     }
666     diag_so_far += dnz[j];
667     /* Set the right off-diagonal */
668     for (i=i+dnz[j]; i<dnz[j]+onz[j]; i++) {
669       col = i + mat_i[j];
670       bj[i-dnz[j]+offd_so_far] = mat_j[col];
671     }
672     bilen[j] = onz[j];
673     offd_so_far += onz[j];
674   }
675   PetscFunctionReturn(0);
676 }
677 
678 
679 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
680 {
681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
682   PetscErrorCode ierr;
683   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
684   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
685 
686   PetscFunctionBegin;
687   for (i=0; i<m; i++) {
688     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
689     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
690     if (idxm[i] >= rstart && idxm[i] < rend) {
691       row = idxm[i] - rstart;
692       for (j=0; j<n; j++) {
693         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
694         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
695         if (idxn[j] >= cstart && idxn[j] < cend) {
696           col  = idxn[j] - cstart;
697           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
698         } else {
699           if (!aij->colmap) {
700             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
701           }
702 #if defined(PETSC_USE_CTABLE)
703           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
704           col--;
705 #else
706           col = aij->colmap[idxn[j]] - 1;
707 #endif
708           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
709           else {
710             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
711           }
712         }
713       }
714     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
720 
721 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
722 {
723   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
724   PetscErrorCode ierr;
725   PetscInt       nstash,reallocs;
726 
727   PetscFunctionBegin;
728   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
729 
730   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
731   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
732   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
737 {
738   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
739   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
740   PetscErrorCode ierr;
741   PetscMPIInt    n;
742   PetscInt       i,j,rstart,ncols,flg;
743   PetscInt       *row,*col;
744   PetscBool      other_disassembled;
745   PetscScalar    *val;
746 
747   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
748 
749   PetscFunctionBegin;
750   if (!aij->donotstash && !mat->nooffprocentries) {
751     while (1) {
752       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
753       if (!flg) break;
754 
755       for (i=0; i<n; ) {
756         /* Now identify the consecutive vals belonging to the same row */
757         for (j=i,rstart=row[j]; j<n; j++) {
758           if (row[j] != rstart) break;
759         }
760         if (j < n) ncols = j-i;
761         else       ncols = n-i;
762         /* Now assemble all these values with a single function call */
763         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
764 
765         i = j;
766       }
767     }
768     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
769   }
770   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
771   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
772 
773   /* determine if any processor has disassembled, if so we must
774      also disassemble ourselfs, in order that we may reassemble. */
775   /*
776      if nonzero structure of submatrix B cannot change then we know that
777      no processor disassembled thus we can skip this stuff
778   */
779   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
780     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
781     if (mat->was_assembled && !other_disassembled) {
782       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
783     }
784   }
785   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
786     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
787   }
788   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
789   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
790   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
791 
792   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
793 
794   aij->rowvalues = 0;
795 
796   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
797   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
798 
799   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
800   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
801     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
802     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
808 {
809   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
810   PetscErrorCode ierr;
811 
812   PetscFunctionBegin;
813   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
814   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
815   PetscFunctionReturn(0);
816 }
817 
818 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
819 {
820   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
821   PetscInt      *lrows;
822   PetscInt       r, len;
823   PetscBool      cong;
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827   /* get locally owned rows */
828   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
829   /* fix right hand side if needed */
830   if (x && b) {
831     const PetscScalar *xx;
832     PetscScalar       *bb;
833 
834     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
835     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
836     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
837     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
838     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
839   }
840   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
841   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
842   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
843   if ((diag != 0.0) && cong) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
868 {
869   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       n = A->rmap->n;
872   PetscInt          i,j,r,m,p = 0,len = 0;
873   PetscInt          *lrows,*owners = A->rmap->range;
874   PetscSFNode       *rrows;
875   PetscSF           sf;
876   const PetscScalar *xx;
877   PetscScalar       *bb,*mask;
878   Vec               xmask,lmask;
879   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
880   const PetscInt    *aj, *ii,*ridx;
881   PetscScalar       *aa;
882 
883   PetscFunctionBegin;
884   /* Create SF where leaves are input rows and roots are owned rows */
885   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
886   for (r = 0; r < n; ++r) lrows[r] = -1;
887   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
888   for (r = 0; r < N; ++r) {
889     const PetscInt idx   = rows[r];
890     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
891     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
892       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
893     }
894     rrows[r].rank  = p;
895     rrows[r].index = rows[r] - owners[p];
896   }
897   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
898   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
899   /* Collect flags for rows to be zeroed */
900   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
901   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
902   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
903   /* Compress and put in row numbers */
904   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
905   /* zero diagonal part of matrix */
906   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
907   /* handle off diagonal part of matrix */
908   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
909   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
910   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
911   for (i=0; i<len; i++) bb[lrows[i]] = 1;
912   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
913   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
916   if (x) {
917     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
921   }
922   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
923   /* remove zeroed rows of off diagonal matrix */
924   ii = aij->i;
925   for (i=0; i<len; i++) {
926     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
927   }
928   /* loop over all elements of off process part of matrix zeroing removed columns*/
929   if (aij->compressedrow.use) {
930     m    = aij->compressedrow.nrows;
931     ii   = aij->compressedrow.i;
932     ridx = aij->compressedrow.rindex;
933     for (i=0; i<m; i++) {
934       n  = ii[i+1] - ii[i];
935       aj = aij->j + ii[i];
936       aa = aij->a + ii[i];
937 
938       for (j=0; j<n; j++) {
939         if (PetscAbsScalar(mask[*aj])) {
940           if (b) bb[*ridx] -= *aa*xx[*aj];
941           *aa = 0.0;
942         }
943         aa++;
944         aj++;
945       }
946       ridx++;
947     }
948   } else { /* do not use compressed row format */
949     m = l->B->rmap->n;
950     for (i=0; i<m; i++) {
951       n  = ii[i+1] - ii[i];
952       aj = aij->j + ii[i];
953       aa = aij->a + ii[i];
954       for (j=0; j<n; j++) {
955         if (PetscAbsScalar(mask[*aj])) {
956           if (b) bb[i] -= *aa*xx[*aj];
957           *aa = 0.0;
958         }
959         aa++;
960         aj++;
961       }
962     }
963   }
964   if (x) {
965     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
966     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
967   }
968   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
969   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971 
972   /* only change matrix nonzero state if pattern was allowed to be changed */
973   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
974     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
975     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984   PetscInt       nt;
985   VecScatter     Mvctx = a->Mvctx;
986 
987   PetscFunctionBegin;
988   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
989   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
990 
991   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
993   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1009 {
1010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1011   PetscErrorCode ierr;
1012   VecScatter     Mvctx = a->Mvctx;
1013 
1014   PetscFunctionBegin;
1015   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1016   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1017   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1018   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1024 {
1025   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1026   PetscErrorCode ierr;
1027   PetscBool      merged;
1028 
1029   PetscFunctionBegin;
1030   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1031   /* do nondiagonal part */
1032   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1033   if (!merged) {
1034     /* send it on its way */
1035     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1036     /* do local part */
1037     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1038     /* receive remote parts: note this assumes the values are not actually */
1039     /* added in yy until the next line, */
1040     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1041   } else {
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* send it on its way */
1045     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1046     /* values actually were received in the Begin() but we need to call this nop */
1047     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1048   }
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1053 {
1054   MPI_Comm       comm;
1055   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1056   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1057   IS             Me,Notme;
1058   PetscErrorCode ierr;
1059   PetscInt       M,N,first,last,*notme,i;
1060   PetscMPIInt    size;
1061 
1062   PetscFunctionBegin;
1063   /* Easy test: symmetric diagonal block */
1064   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1065   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1066   if (!*f) PetscFunctionReturn(0);
1067   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1069   if (size == 1) PetscFunctionReturn(0);
1070 
1071   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1072   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1073   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1074   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1075   for (i=0; i<first; i++) notme[i] = i;
1076   for (i=last; i<M; i++) notme[i-last+first] = i;
1077   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1078   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1079   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1080   Aoff = Aoffs[0];
1081   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1082   Boff = Boffs[0];
1083   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1084   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1086   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1088   ierr = PetscFree(notme);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1093 {
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   /* do nondiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1109   /* send it on its way */
1110   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1111   /* do local part */
1112   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   /* receive remote parts */
1114   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119   This only works correctly for square matrices where the subblock A->A is the
1120    diagonal block
1121 */
1122 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1123 {
1124   PetscErrorCode ierr;
1125   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1126 
1127   PetscFunctionBegin;
1128   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1129   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1130   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1135 {
1136   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1137   PetscErrorCode ierr;
1138 
1139   PetscFunctionBegin;
1140   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1141   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1167   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1168   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1169   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1170 
1171   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1180 #if defined(PETSC_HAVE_ELEMENTAL)
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1182 #endif
1183 #if defined(PETSC_HAVE_HYPRE)
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1186 #endif
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1193 {
1194   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1195   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1196   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1197   PetscErrorCode ierr;
1198   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1199   int            fd;
1200   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1201   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1202   PetscScalar    *column_values;
1203   PetscInt       message_count,flowcontrolcount;
1204   FILE           *file;
1205 
1206   PetscFunctionBegin;
1207   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1208   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1209   nz   = A->nz + B->nz;
1210   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1211   if (!rank) {
1212     header[0] = MAT_FILE_CLASSID;
1213     header[1] = mat->rmap->N;
1214     header[2] = mat->cmap->N;
1215 
1216     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1217     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1218     /* get largest number of rows any processor has */
1219     rlen  = mat->rmap->n;
1220     range = mat->rmap->range;
1221     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1222   } else {
1223     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1224     rlen = mat->rmap->n;
1225   }
1226 
1227   /* load up the local row counts */
1228   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1229   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1230 
1231   /* store the row lengths to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     for (i=1; i<size; i++) {
1236       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1237       rlen = range[i+1] - range[i];
1238       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1240     }
1241     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1242   } else {
1243     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1244     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1246   }
1247   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1248 
1249   /* load up the local column indices */
1250   nzmax = nz; /* th processor needs space a largest processor needs */
1251   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1253   cnt   = 0;
1254   for (i=0; i<mat->rmap->n; i++) {
1255     for (j=B->i[i]; j<B->i[i+1]; j++) {
1256       if ((col = garray[B->j[j]]) > cstart) break;
1257       column_indices[cnt++] = col;
1258     }
1259     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1260     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1261   }
1262   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1263 
1264   /* store the column indices to the file */
1265   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1266   if (!rank) {
1267     MPI_Status status;
1268     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1269     for (i=1; i<size; i++) {
1270       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1271       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1272       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1273       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1274       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1275     }
1276     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1277   } else {
1278     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1279     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1282   }
1283   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1284 
1285   /* load up the local column values */
1286   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1287   cnt  = 0;
1288   for (i=0; i<mat->rmap->n; i++) {
1289     for (j=B->i[i]; j<B->i[i+1]; j++) {
1290       if (garray[B->j[j]] > cstart) break;
1291       column_values[cnt++] = B->a[j];
1292     }
1293     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1294     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1295   }
1296   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1297 
1298   /* store the column values to the file */
1299   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1300   if (!rank) {
1301     MPI_Status status;
1302     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1303     for (i=1; i<size; i++) {
1304       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1305       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1306       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1307       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1308       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1309     }
1310     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1311   } else {
1312     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1313     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1314     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1316   }
1317   ierr = PetscFree(column_values);CHKERRQ(ierr);
1318 
1319   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1320   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #include <petscdraw.h>
1325 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1326 {
1327   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1328   PetscErrorCode    ierr;
1329   PetscMPIInt       rank = aij->rank,size = aij->size;
1330   PetscBool         isdraw,iascii,isbinary;
1331   PetscViewer       sviewer;
1332   PetscViewerFormat format;
1333 
1334   PetscFunctionBegin;
1335   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1336   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1337   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1338   if (iascii) {
1339     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1340     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1341       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1342       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1343       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1344       for (i=0; i<(PetscInt)size; i++) {
1345         nmax = PetscMax(nmax,nz[i]);
1346         nmin = PetscMin(nmin,nz[i]);
1347         navg += nz[i];
1348       }
1349       ierr = PetscFree(nz);CHKERRQ(ierr);
1350       navg = navg/size;
1351       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1352       PetscFunctionReturn(0);
1353     }
1354     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1355     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1356       MatInfo   info;
1357       PetscBool inodes;
1358 
1359       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1360       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1361       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1363       if (!inodes) {
1364         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1365                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1368                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1369       }
1370       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1371       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1372       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1374       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1375       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1376       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1377       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1378       PetscFunctionReturn(0);
1379     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1380       PetscInt inodecount,inodelimit,*inodes;
1381       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1382       if (inodes) {
1383         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1384       } else {
1385         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1386       }
1387       PetscFunctionReturn(0);
1388     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1389       PetscFunctionReturn(0);
1390     }
1391   } else if (isbinary) {
1392     if (size == 1) {
1393       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1394       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1395     } else {
1396       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1397     }
1398     PetscFunctionReturn(0);
1399   } else if (isdraw) {
1400     PetscDraw draw;
1401     PetscBool isnull;
1402     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1403     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1404     if (isnull) PetscFunctionReturn(0);
1405   }
1406 
1407   {
1408     /* assemble the entire matrix onto first processor. */
1409     Mat        A;
1410     Mat_SeqAIJ *Aloc;
1411     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1412     MatScalar  *a;
1413 
1414     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1415     if (!rank) {
1416       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1417     } else {
1418       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1419     }
1420     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1421     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1422     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1423     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1424     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1425 
1426     /* copy over the A part */
1427     Aloc = (Mat_SeqAIJ*)aij->A->data;
1428     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1429     row  = mat->rmap->rstart;
1430     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1431     for (i=0; i<m; i++) {
1432       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1433       row++;
1434       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1435     }
1436     aj = Aloc->j;
1437     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1438 
1439     /* copy over the B part */
1440     Aloc = (Mat_SeqAIJ*)aij->B->data;
1441     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1442     row  = mat->rmap->rstart;
1443     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1444     ct   = cols;
1445     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1446     for (i=0; i<m; i++) {
1447       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1448       row++;
1449       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1450     }
1451     ierr = PetscFree(ct);CHKERRQ(ierr);
1452     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1453     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454     /*
1455        Everyone has to call to draw the matrix since the graphics waits are
1456        synchronized across all processors that share the PetscDraw object
1457     */
1458     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1459     if (!rank) {
1460       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1461       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1462     }
1463     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1464     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1465     ierr = MatDestroy(&A);CHKERRQ(ierr);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1471 {
1472   PetscErrorCode ierr;
1473   PetscBool      iascii,isdraw,issocket,isbinary;
1474 
1475   PetscFunctionBegin;
1476   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1477   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1478   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1479   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1480   if (iascii || isdraw || isbinary || issocket) {
1481     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1487 {
1488   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1489   PetscErrorCode ierr;
1490   Vec            bb1 = 0;
1491   PetscBool      hasop;
1492 
1493   PetscFunctionBegin;
1494   if (flag == SOR_APPLY_UPPER) {
1495     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1496     PetscFunctionReturn(0);
1497   }
1498 
1499   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1500     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1501   }
1502 
1503   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1504     if (flag & SOR_ZERO_INITIAL_GUESS) {
1505       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1506       its--;
1507     }
1508 
1509     while (its--) {
1510       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512 
1513       /* update rhs: bb1 = bb - B*x */
1514       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1515       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1516 
1517       /* local sweep */
1518       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1519     }
1520   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1521     if (flag & SOR_ZERO_INITIAL_GUESS) {
1522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1523       its--;
1524     }
1525     while (its--) {
1526       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1527       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528 
1529       /* update rhs: bb1 = bb - B*x */
1530       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1531       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1532 
1533       /* local sweep */
1534       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1535     }
1536   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1537     if (flag & SOR_ZERO_INITIAL_GUESS) {
1538       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1539       its--;
1540     }
1541     while (its--) {
1542       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1543       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544 
1545       /* update rhs: bb1 = bb - B*x */
1546       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1547       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1548 
1549       /* local sweep */
1550       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1551     }
1552   } else if (flag & SOR_EISENSTAT) {
1553     Vec xx1;
1554 
1555     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1556     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1557 
1558     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1559     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1560     if (!mat->diag) {
1561       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1562       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1563     }
1564     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1565     if (hasop) {
1566       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1567     } else {
1568       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1569     }
1570     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1571 
1572     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1573 
1574     /* local sweep */
1575     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1576     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1577     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1578   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1579 
1580   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1581 
1582   matin->factorerrortype = mat->A->factorerrortype;
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1587 {
1588   Mat            aA,aB,Aperm;
1589   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1590   PetscScalar    *aa,*ba;
1591   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1592   PetscSF        rowsf,sf;
1593   IS             parcolp = NULL;
1594   PetscBool      done;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1599   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1600   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1601   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1602 
1603   /* Invert row permutation to find out where my rows should go */
1604   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1605   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1606   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1607   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1608   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1610 
1611   /* Invert column permutation to find out where my columns should go */
1612   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1613   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1614   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1615   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1616   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1617   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1618   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1619 
1620   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1621   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1622   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1623 
1624   /* Find out where my gcols should go */
1625   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1626   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1627   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1628   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1629   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1630   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1631   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1632   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1633 
1634   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1635   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1636   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1637   for (i=0; i<m; i++) {
1638     PetscInt row = rdest[i],rowner;
1639     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1640     for (j=ai[i]; j<ai[i+1]; j++) {
1641       PetscInt cowner,col = cdest[aj[j]];
1642       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1643       if (rowner == cowner) dnnz[i]++;
1644       else onnz[i]++;
1645     }
1646     for (j=bi[i]; j<bi[i+1]; j++) {
1647       PetscInt cowner,col = gcdest[bj[j]];
1648       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1649       if (rowner == cowner) dnnz[i]++;
1650       else onnz[i]++;
1651     }
1652   }
1653   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1654   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1655   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1656   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1657   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1658 
1659   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1660   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1661   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1664     PetscInt j0,rowlen;
1665     rowlen = ai[i+1] - ai[i];
1666     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1667       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1668       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1669     }
1670     rowlen = bi[i+1] - bi[i];
1671     for (j0=j=0; j<rowlen; j0=j) {
1672       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1673       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1674     }
1675   }
1676   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1677   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1679   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1680   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1681   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1682   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1683   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1684   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1685   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1686   *B = Aperm;
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1691 {
1692   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1697   if (ghosts) *ghosts = aij->garray;
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1702 {
1703   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1704   Mat            A    = mat->A,B = mat->B;
1705   PetscErrorCode ierr;
1706   PetscReal      isend[5],irecv[5];
1707 
1708   PetscFunctionBegin;
1709   info->block_size = 1.0;
1710   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1711 
1712   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1713   isend[3] = info->memory;  isend[4] = info->mallocs;
1714 
1715   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1716 
1717   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1718   isend[3] += info->memory;  isend[4] += info->mallocs;
1719   if (flag == MAT_LOCAL) {
1720     info->nz_used      = isend[0];
1721     info->nz_allocated = isend[1];
1722     info->nz_unneeded  = isend[2];
1723     info->memory       = isend[3];
1724     info->mallocs      = isend[4];
1725   } else if (flag == MAT_GLOBAL_MAX) {
1726     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1727 
1728     info->nz_used      = irecv[0];
1729     info->nz_allocated = irecv[1];
1730     info->nz_unneeded  = irecv[2];
1731     info->memory       = irecv[3];
1732     info->mallocs      = irecv[4];
1733   } else if (flag == MAT_GLOBAL_SUM) {
1734     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1735 
1736     info->nz_used      = irecv[0];
1737     info->nz_allocated = irecv[1];
1738     info->nz_unneeded  = irecv[2];
1739     info->memory       = irecv[3];
1740     info->mallocs      = irecv[4];
1741   }
1742   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1743   info->fill_ratio_needed = 0;
1744   info->factor_mallocs    = 0;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1749 {
1750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   switch (op) {
1755   case MAT_NEW_NONZERO_LOCATIONS:
1756   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1757   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1758   case MAT_KEEP_NONZERO_PATTERN:
1759   case MAT_NEW_NONZERO_LOCATION_ERR:
1760   case MAT_USE_INODES:
1761   case MAT_IGNORE_ZERO_ENTRIES:
1762     MatCheckPreallocated(A,1);
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1765     break;
1766   case MAT_ROW_ORIENTED:
1767     MatCheckPreallocated(A,1);
1768     a->roworiented = flg;
1769 
1770     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1771     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1772     break;
1773   case MAT_NEW_DIAGONALS:
1774     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1775     break;
1776   case MAT_IGNORE_OFF_PROC_ENTRIES:
1777     a->donotstash = flg;
1778     break;
1779   case MAT_SPD:
1780     A->spd_set = PETSC_TRUE;
1781     A->spd     = flg;
1782     if (flg) {
1783       A->symmetric                  = PETSC_TRUE;
1784       A->structurally_symmetric     = PETSC_TRUE;
1785       A->symmetric_set              = PETSC_TRUE;
1786       A->structurally_symmetric_set = PETSC_TRUE;
1787     }
1788     break;
1789   case MAT_SYMMETRIC:
1790     MatCheckPreallocated(A,1);
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_STRUCTURALLY_SYMMETRIC:
1794     MatCheckPreallocated(A,1);
1795     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1796     break;
1797   case MAT_HERMITIAN:
1798     MatCheckPreallocated(A,1);
1799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1800     break;
1801   case MAT_SYMMETRY_ETERNAL:
1802     MatCheckPreallocated(A,1);
1803     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1804     break;
1805   case MAT_SUBMAT_SINGLEIS:
1806     A->submat_singleis = flg;
1807     break;
1808   case MAT_STRUCTURE_ONLY:
1809     /* The option is handled directly by MatSetOption() */
1810     break;
1811   default:
1812     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1813   }
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1818 {
1819   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1820   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1821   PetscErrorCode ierr;
1822   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1823   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1824   PetscInt       *cmap,*idx_p;
1825 
1826   PetscFunctionBegin;
1827   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1828   mat->getrowactive = PETSC_TRUE;
1829 
1830   if (!mat->rowvalues && (idx || v)) {
1831     /*
1832         allocate enough space to hold information from the longest row.
1833     */
1834     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1835     PetscInt   max = 1,tmp;
1836     for (i=0; i<matin->rmap->n; i++) {
1837       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1838       if (max < tmp) max = tmp;
1839     }
1840     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1841   }
1842 
1843   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1844   lrow = row - rstart;
1845 
1846   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1847   if (!v)   {pvA = 0; pvB = 0;}
1848   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1849   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1850   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1851   nztot = nzA + nzB;
1852 
1853   cmap = mat->garray;
1854   if (v  || idx) {
1855     if (nztot) {
1856       /* Sort by increasing column numbers, assuming A and B already sorted */
1857       PetscInt imark = -1;
1858       if (v) {
1859         *v = v_p = mat->rowvalues;
1860         for (i=0; i<nzB; i++) {
1861           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1862           else break;
1863         }
1864         imark = i;
1865         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1866         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1867       }
1868       if (idx) {
1869         *idx = idx_p = mat->rowindices;
1870         if (imark > -1) {
1871           for (i=0; i<imark; i++) {
1872             idx_p[i] = cmap[cworkB[i]];
1873           }
1874         } else {
1875           for (i=0; i<nzB; i++) {
1876             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1877             else break;
1878           }
1879           imark = i;
1880         }
1881         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1882         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1883       }
1884     } else {
1885       if (idx) *idx = 0;
1886       if (v)   *v   = 0;
1887     }
1888   }
1889   *nz  = nztot;
1890   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1891   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1896 {
1897   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1898 
1899   PetscFunctionBegin;
1900   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1901   aij->getrowactive = PETSC_FALSE;
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1906 {
1907   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1908   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1909   PetscErrorCode ierr;
1910   PetscInt       i,j,cstart = mat->cmap->rstart;
1911   PetscReal      sum = 0.0;
1912   MatScalar      *v;
1913 
1914   PetscFunctionBegin;
1915   if (aij->size == 1) {
1916     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1917   } else {
1918     if (type == NORM_FROBENIUS) {
1919       v = amat->a;
1920       for (i=0; i<amat->nz; i++) {
1921         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1922       }
1923       v = bmat->a;
1924       for (i=0; i<bmat->nz; i++) {
1925         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1926       }
1927       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1928       *norm = PetscSqrtReal(*norm);
1929       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1930     } else if (type == NORM_1) { /* max column norm */
1931       PetscReal *tmp,*tmp2;
1932       PetscInt  *jj,*garray = aij->garray;
1933       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1934       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1935       *norm = 0.0;
1936       v     = amat->a; jj = amat->j;
1937       for (j=0; j<amat->nz; j++) {
1938         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1939       }
1940       v = bmat->a; jj = bmat->j;
1941       for (j=0; j<bmat->nz; j++) {
1942         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1943       }
1944       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1945       for (j=0; j<mat->cmap->N; j++) {
1946         if (tmp2[j] > *norm) *norm = tmp2[j];
1947       }
1948       ierr = PetscFree(tmp);CHKERRQ(ierr);
1949       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1950       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1951     } else if (type == NORM_INFINITY) { /* max row norm */
1952       PetscReal ntemp = 0.0;
1953       for (j=0; j<aij->A->rmap->n; j++) {
1954         v   = amat->a + amat->i[j];
1955         sum = 0.0;
1956         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1957           sum += PetscAbsScalar(*v); v++;
1958         }
1959         v = bmat->a + bmat->i[j];
1960         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1961           sum += PetscAbsScalar(*v); v++;
1962         }
1963         if (sum > ntemp) ntemp = sum;
1964       }
1965       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1966       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1967     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1973 {
1974   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1975   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1976   PetscErrorCode ierr;
1977   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1978   PetscInt       cstart = A->cmap->rstart,ncol;
1979   Mat            B;
1980   MatScalar      *array;
1981 
1982   PetscFunctionBegin;
1983   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1984   ai = Aloc->i; aj = Aloc->j;
1985   bi = Bloc->i; bj = Bloc->j;
1986   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1987     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1988     PetscSFNode          *oloc;
1989     PETSC_UNUSED PetscSF sf;
1990 
1991     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1992     /* compute d_nnz for preallocation */
1993     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1994     for (i=0; i<ai[ma]; i++) {
1995       d_nnz[aj[i]]++;
1996       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1997     }
1998     /* compute local off-diagonal contributions */
1999     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2000     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2001     /* map those to global */
2002     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2003     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2004     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2005     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2006     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2007     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2008     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2009 
2010     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2011     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2012     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2013     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2014     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2015     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2016   } else {
2017     B    = *matout;
2018     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2019     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2020   }
2021 
2022   /* copy over the A part */
2023   array = Aloc->a;
2024   row   = A->rmap->rstart;
2025   for (i=0; i<ma; i++) {
2026     ncol = ai[i+1]-ai[i];
2027     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2028     row++;
2029     array += ncol; aj += ncol;
2030   }
2031   aj = Aloc->j;
2032   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2033 
2034   /* copy over the B part */
2035   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2036   array = Bloc->a;
2037   row   = A->rmap->rstart;
2038   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2039   cols_tmp = cols;
2040   for (i=0; i<mb; i++) {
2041     ncol = bi[i+1]-bi[i];
2042     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2043     row++;
2044     array += ncol; cols_tmp += ncol;
2045   }
2046   ierr = PetscFree(cols);CHKERRQ(ierr);
2047 
2048   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2049   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2050   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2051     *matout = B;
2052   } else {
2053     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2054   }
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2059 {
2060   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2061   Mat            a    = aij->A,b = aij->B;
2062   PetscErrorCode ierr;
2063   PetscInt       s1,s2,s3;
2064 
2065   PetscFunctionBegin;
2066   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2067   if (rr) {
2068     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2069     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2070     /* Overlap communication with computation. */
2071     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2072   }
2073   if (ll) {
2074     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2075     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2076     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2077   }
2078   /* scale  the diagonal block */
2079   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2080 
2081   if (rr) {
2082     /* Do a scatter end and then right scale the off-diagonal block */
2083     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2084     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2090 {
2091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2092   PetscErrorCode ierr;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2122   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2123 
2124   PetscFunctionBegin;
2125   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2126   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2127     /* because of the column compression in the off-processor part of the matrix a->B,
2128        the number of columns in a->B and b->B may be different, hence we cannot call
2129        the MatCopy() directly on the two parts. If need be, we can provide a more
2130        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2131        then copying the submatrices */
2132     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2133   } else {
2134     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2135     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2136   }
2137   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2142 {
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /*
2151    Computes the number of nonzeros per row needed for preallocation when X and Y
2152    have different nonzero structure.
2153 */
2154 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2155 {
2156   PetscInt       i,j,k,nzx,nzy;
2157 
2158   PetscFunctionBegin;
2159   /* Set the number of nonzeros in the new matrix */
2160   for (i=0; i<m; i++) {
2161     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2162     nzx = xi[i+1] - xi[i];
2163     nzy = yi[i+1] - yi[i];
2164     nnz[i] = 0;
2165     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2166       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2167       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2168       nnz[i]++;
2169     }
2170     for (; k<nzy; k++) nnz[i]++;
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2176 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2177 {
2178   PetscErrorCode ierr;
2179   PetscInt       m = Y->rmap->N;
2180   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2181   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2189 {
2190   PetscErrorCode ierr;
2191   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2192   PetscBLASInt   bnz,one=1;
2193   Mat_SeqAIJ     *x,*y;
2194 
2195   PetscFunctionBegin;
2196   if (str == SAME_NONZERO_PATTERN) {
2197     PetscScalar alpha = a;
2198     x    = (Mat_SeqAIJ*)xx->A->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     y    = (Mat_SeqAIJ*)yy->A->data;
2201     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2202     x    = (Mat_SeqAIJ*)xx->B->data;
2203     y    = (Mat_SeqAIJ*)yy->B->data;
2204     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2205     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2206     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2207   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2208     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2209   } else {
2210     Mat      B;
2211     PetscInt *nnz_d,*nnz_o;
2212     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2213     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2214     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2215     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2216     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2217     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2218     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2219     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2220     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2221     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2222     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2223     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2224     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2225     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2226   }
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2231 
2232 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2233 {
2234 #if defined(PETSC_USE_COMPLEX)
2235   PetscErrorCode ierr;
2236   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2237 
2238   PetscFunctionBegin;
2239   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2240   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2241 #else
2242   PetscFunctionBegin;
2243 #endif
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2248 {
2249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2250   PetscErrorCode ierr;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2254   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2259 {
2260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2265   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2270 {
2271   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2272   PetscErrorCode ierr;
2273   PetscInt       i,*idxb = 0;
2274   PetscScalar    *va,*vb;
2275   Vec            vtmp;
2276 
2277   PetscFunctionBegin;
2278   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2279   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2280   if (idx) {
2281     for (i=0; i<A->rmap->n; i++) {
2282       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2283     }
2284   }
2285 
2286   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2287   if (idx) {
2288     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2289   }
2290   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2291   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2292 
2293   for (i=0; i<A->rmap->n; i++) {
2294     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2295       va[i] = vb[i];
2296       if (idx) idx[i] = a->garray[idxb[i]];
2297     }
2298   }
2299 
2300   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2301   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2302   ierr = PetscFree(idxb);CHKERRQ(ierr);
2303   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2308 {
2309   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2310   PetscErrorCode ierr;
2311   PetscInt       i,*idxb = 0;
2312   PetscScalar    *va,*vb;
2313   Vec            vtmp;
2314 
2315   PetscFunctionBegin;
2316   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2317   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2318   if (idx) {
2319     for (i=0; i<A->cmap->n; i++) {
2320       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2321     }
2322   }
2323 
2324   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2325   if (idx) {
2326     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2327   }
2328   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2329   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2330 
2331   for (i=0; i<A->rmap->n; i++) {
2332     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2333       va[i] = vb[i];
2334       if (idx) idx[i] = a->garray[idxb[i]];
2335     }
2336   }
2337 
2338   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2339   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2340   ierr = PetscFree(idxb);CHKERRQ(ierr);
2341   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2346 {
2347   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2348   PetscInt       n      = A->rmap->n;
2349   PetscInt       cstart = A->cmap->rstart;
2350   PetscInt       *cmap  = mat->garray;
2351   PetscInt       *diagIdx, *offdiagIdx;
2352   Vec            diagV, offdiagV;
2353   PetscScalar    *a, *diagA, *offdiagA;
2354   PetscInt       r;
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2359   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2360   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2361   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2362   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2363   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2364   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2365   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2366   for (r = 0; r < n; ++r) {
2367     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2368       a[r]   = diagA[r];
2369       idx[r] = cstart + diagIdx[r];
2370     } else {
2371       a[r]   = offdiagA[r];
2372       idx[r] = cmap[offdiagIdx[r]];
2373     }
2374   }
2375   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2378   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2379   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2380   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2385 {
2386   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2387   PetscInt       n      = A->rmap->n;
2388   PetscInt       cstart = A->cmap->rstart;
2389   PetscInt       *cmap  = mat->garray;
2390   PetscInt       *diagIdx, *offdiagIdx;
2391   Vec            diagV, offdiagV;
2392   PetscScalar    *a, *diagA, *offdiagA;
2393   PetscInt       r;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2398   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2399   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2400   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2401   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2402   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2403   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2404   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2405   for (r = 0; r < n; ++r) {
2406     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2407       a[r]   = diagA[r];
2408       idx[r] = cstart + diagIdx[r];
2409     } else {
2410       a[r]   = offdiagA[r];
2411       idx[r] = cmap[offdiagIdx[r]];
2412     }
2413   }
2414   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2415   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2416   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2417   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2418   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2419   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2424 {
2425   PetscErrorCode ierr;
2426   Mat            *dummy;
2427 
2428   PetscFunctionBegin;
2429   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2430   *newmat = *dummy;
2431   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2436 {
2437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2438   PetscErrorCode ierr;
2439 
2440   PetscFunctionBegin;
2441   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2442   A->factorerrortype = a->A->factorerrortype;
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2447 {
2448   PetscErrorCode ierr;
2449   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2450 
2451   PetscFunctionBegin;
2452   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2453   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2454   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2455   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2460 {
2461   PetscFunctionBegin;
2462   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2463   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 /*@
2468    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2469 
2470    Collective on Mat
2471 
2472    Input Parameters:
2473 +    A - the matrix
2474 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2475 
2476  Level: advanced
2477 
2478 @*/
2479 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2480 {
2481   PetscErrorCode       ierr;
2482 
2483   PetscFunctionBegin;
2484   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2489 {
2490   PetscErrorCode       ierr;
2491   PetscBool            sc = PETSC_FALSE,flg;
2492 
2493   PetscFunctionBegin;
2494   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2495   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2496   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2497   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2498   if (flg) {
2499     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2500   }
2501   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2506 {
2507   PetscErrorCode ierr;
2508   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2509   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2510 
2511   PetscFunctionBegin;
2512   if (!Y->preallocated) {
2513     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2514   } else if (!aij->nz) {
2515     PetscInt nonew = aij->nonew;
2516     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2517     aij->nonew = nonew;
2518   }
2519   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2524 {
2525   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2526   PetscErrorCode ierr;
2527 
2528   PetscFunctionBegin;
2529   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2530   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2531   if (d) {
2532     PetscInt rstart;
2533     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2534     *d += rstart;
2535 
2536   }
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 
2541 /* -------------------------------------------------------------------*/
2542 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2543                                        MatGetRow_MPIAIJ,
2544                                        MatRestoreRow_MPIAIJ,
2545                                        MatMult_MPIAIJ,
2546                                 /* 4*/ MatMultAdd_MPIAIJ,
2547                                        MatMultTranspose_MPIAIJ,
2548                                        MatMultTransposeAdd_MPIAIJ,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                 /*10*/ 0,
2553                                        0,
2554                                        0,
2555                                        MatSOR_MPIAIJ,
2556                                        MatTranspose_MPIAIJ,
2557                                 /*15*/ MatGetInfo_MPIAIJ,
2558                                        MatEqual_MPIAIJ,
2559                                        MatGetDiagonal_MPIAIJ,
2560                                        MatDiagonalScale_MPIAIJ,
2561                                        MatNorm_MPIAIJ,
2562                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2563                                        MatAssemblyEnd_MPIAIJ,
2564                                        MatSetOption_MPIAIJ,
2565                                        MatZeroEntries_MPIAIJ,
2566                                 /*24*/ MatZeroRows_MPIAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                 /*29*/ MatSetUp_MPIAIJ,
2572                                        0,
2573                                        0,
2574                                        MatGetDiagonalBlock_MPIAIJ,
2575                                        0,
2576                                 /*34*/ MatDuplicate_MPIAIJ,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                        0,
2581                                 /*39*/ MatAXPY_MPIAIJ,
2582                                        MatCreateSubMatrices_MPIAIJ,
2583                                        MatIncreaseOverlap_MPIAIJ,
2584                                        MatGetValues_MPIAIJ,
2585                                        MatCopy_MPIAIJ,
2586                                 /*44*/ MatGetRowMax_MPIAIJ,
2587                                        MatScale_MPIAIJ,
2588                                        MatShift_MPIAIJ,
2589                                        MatDiagonalSet_MPIAIJ,
2590                                        MatZeroRowsColumns_MPIAIJ,
2591                                 /*49*/ MatSetRandom_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2597                                        0,
2598                                        MatSetUnfactored_MPIAIJ,
2599                                        MatPermute_MPIAIJ,
2600                                        0,
2601                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2602                                        MatDestroy_MPIAIJ,
2603                                        MatView_MPIAIJ,
2604                                        0,
2605                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2606                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2607                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2612                                        MatGetRowMinAbs_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                 /*75*/ MatFDColoringApply_AIJ,
2618                                        MatSetFromOptions_MPIAIJ,
2619                                        0,
2620                                        0,
2621                                        MatFindZeroDiagonals_MPIAIJ,
2622                                 /*80*/ 0,
2623                                        0,
2624                                        0,
2625                                 /*83*/ MatLoad_MPIAIJ,
2626                                        MatIsSymmetric_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2632                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2633                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2634                                        MatPtAP_MPIAIJ_MPIAIJ,
2635                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2636                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*99*/ 0,
2642                                        0,
2643                                        0,
2644                                        MatConjugate_MPIAIJ,
2645                                        0,
2646                                 /*104*/MatSetValuesRow_MPIAIJ,
2647                                        MatRealPart_MPIAIJ,
2648                                        MatImaginaryPart_MPIAIJ,
2649                                        0,
2650                                        0,
2651                                 /*109*/0,
2652                                        0,
2653                                        MatGetRowMin_MPIAIJ,
2654                                        0,
2655                                        MatMissingDiagonal_MPIAIJ,
2656                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2657                                        0,
2658                                        MatGetGhosts_MPIAIJ,
2659                                        0,
2660                                        0,
2661                                 /*119*/0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        MatGetMultiProcBlock_MPIAIJ,
2666                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2667                                        MatGetColumnNorms_MPIAIJ,
2668                                        MatInvertBlockDiagonal_MPIAIJ,
2669                                        0,
2670                                        MatCreateSubMatricesMPI_MPIAIJ,
2671                                 /*129*/0,
2672                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2673                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2674                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2675                                        0,
2676                                 /*134*/0,
2677                                        0,
2678                                        MatRARt_MPIAIJ_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                 /*139*/MatSetBlockSizes_MPIAIJ,
2682                                        0,
2683                                        0,
2684                                        MatFDColoringSetUp_MPIXAIJ,
2685                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2686                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2687 };
2688 
2689 /* ----------------------------------------------------------------------------------------*/
2690 
2691 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2692 {
2693   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2694   PetscErrorCode ierr;
2695 
2696   PetscFunctionBegin;
2697   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2698   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2703 {
2704   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2709   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2714 {
2715   Mat_MPIAIJ     *b;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2720   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2721   b = (Mat_MPIAIJ*)B->data;
2722 
2723 #if defined(PETSC_USE_CTABLE)
2724   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2725 #else
2726   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2727 #endif
2728   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2729   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2730   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2731 
2732   /* Because the B will have been resized we simply destroy it and create a new one each time */
2733   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2734   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2735   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2736   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2737   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2738   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2739 
2740   if (!B->preallocated) {
2741     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2742     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2743     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2744     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2745     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2746   }
2747 
2748   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2749   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2750   B->preallocated  = PETSC_TRUE;
2751   B->was_assembled = PETSC_FALSE;
2752   B->assembled     = PETSC_FALSE;;
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2757 {
2758   Mat_MPIAIJ     *b;
2759   PetscErrorCode ierr;
2760 
2761   PetscFunctionBegin;
2762   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2763   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2765   b = (Mat_MPIAIJ*)B->data;
2766 
2767 #if defined(PETSC_USE_CTABLE)
2768   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2769 #else
2770   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2771 #endif
2772   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2773   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2774   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2775 
2776   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2777   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2778   B->preallocated  = PETSC_TRUE;
2779   B->was_assembled = PETSC_FALSE;
2780   B->assembled = PETSC_FALSE;
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2785 {
2786   Mat            mat;
2787   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   *newmat = 0;
2792   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2793   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2794   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2795   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2796   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2797   a       = (Mat_MPIAIJ*)mat->data;
2798 
2799   mat->factortype   = matin->factortype;
2800   mat->assembled    = PETSC_TRUE;
2801   mat->insertmode   = NOT_SET_VALUES;
2802   mat->preallocated = PETSC_TRUE;
2803 
2804   a->size         = oldmat->size;
2805   a->rank         = oldmat->rank;
2806   a->donotstash   = oldmat->donotstash;
2807   a->roworiented  = oldmat->roworiented;
2808   a->rowindices   = 0;
2809   a->rowvalues    = 0;
2810   a->getrowactive = PETSC_FALSE;
2811 
2812   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2813   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2814 
2815   if (oldmat->colmap) {
2816 #if defined(PETSC_USE_CTABLE)
2817     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2818 #else
2819     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2820     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2821     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2822 #endif
2823   } else a->colmap = 0;
2824   if (oldmat->garray) {
2825     PetscInt len;
2826     len  = oldmat->B->cmap->n;
2827     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2828     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2829     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2830   } else a->garray = 0;
2831 
2832   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2833   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2834   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2835   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2836 
2837   if (oldmat->Mvctx_mpi1) {
2838     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2839     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2840   }
2841 
2842   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2843   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2844   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2845   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2846   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2847   *newmat = mat;
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2852 {
2853   PetscScalar    *vals,*svals;
2854   MPI_Comm       comm;
2855   PetscErrorCode ierr;
2856   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2857   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2858   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2859   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2860   PetscInt       cend,cstart,n,*rowners;
2861   int            fd;
2862   PetscInt       bs = newMat->rmap->bs;
2863 
2864   PetscFunctionBegin;
2865   /* force binary viewer to load .info file if it has not yet done so */
2866   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2867   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2868   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2869   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2870   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2871   if (!rank) {
2872     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2873     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2874     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2875   }
2876 
2877   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2878   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2879   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2880   if (bs < 0) bs = 1;
2881 
2882   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2883   M    = header[1]; N = header[2];
2884 
2885   /* If global sizes are set, check if they are consistent with that given in the file */
2886   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2887   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2888 
2889   /* determine ownership of all (block) rows */
2890   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2891   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2892   else m = newMat->rmap->n; /* Set by user */
2893 
2894   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2895   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2896 
2897   /* First process needs enough room for process with most rows */
2898   if (!rank) {
2899     mmax = rowners[1];
2900     for (i=2; i<=size; i++) {
2901       mmax = PetscMax(mmax, rowners[i]);
2902     }
2903   } else mmax = -1;             /* unused, but compilers complain */
2904 
2905   rowners[0] = 0;
2906   for (i=2; i<=size; i++) {
2907     rowners[i] += rowners[i-1];
2908   }
2909   rstart = rowners[rank];
2910   rend   = rowners[rank+1];
2911 
2912   /* distribute row lengths to all processors */
2913   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2914   if (!rank) {
2915     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2916     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2917     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2918     for (j=0; j<m; j++) {
2919       procsnz[0] += ourlens[j];
2920     }
2921     for (i=1; i<size; i++) {
2922       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2923       /* calculate the number of nonzeros on each processor */
2924       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2925         procsnz[i] += rowlengths[j];
2926       }
2927       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2928     }
2929     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2930   } else {
2931     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2932   }
2933 
2934   if (!rank) {
2935     /* determine max buffer needed and allocate it */
2936     maxnz = 0;
2937     for (i=0; i<size; i++) {
2938       maxnz = PetscMax(maxnz,procsnz[i]);
2939     }
2940     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2941 
2942     /* read in my part of the matrix column indices  */
2943     nz   = procsnz[0];
2944     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2945     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2946 
2947     /* read in every one elses and ship off */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(cols);CHKERRQ(ierr);
2954   } else {
2955     /* determine buffer space needed for message */
2956     nz = 0;
2957     for (i=0; i<m; i++) {
2958       nz += ourlens[i];
2959     }
2960     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2961 
2962     /* receive message of column indices*/
2963     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2964   }
2965 
2966   /* determine column ownership if matrix is not square */
2967   if (N != M) {
2968     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2969     else n = newMat->cmap->n;
2970     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2971     cstart = cend - n;
2972   } else {
2973     cstart = rstart;
2974     cend   = rend;
2975     n      = cend - cstart;
2976   }
2977 
2978   /* loop over local rows, determining number of off diagonal entries */
2979   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2980   jj   = 0;
2981   for (i=0; i<m; i++) {
2982     for (j=0; j<ourlens[i]; j++) {
2983       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2984       jj++;
2985     }
2986   }
2987 
2988   for (i=0; i<m; i++) {
2989     ourlens[i] -= offlens[i];
2990   }
2991   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2992 
2993   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2994 
2995   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2996 
2997   for (i=0; i<m; i++) {
2998     ourlens[i] += offlens[i];
2999   }
3000 
3001   if (!rank) {
3002     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3003 
3004     /* read in my part of the matrix numerical values  */
3005     nz   = procsnz[0];
3006     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3007 
3008     /* insert into matrix */
3009     jj      = rstart;
3010     smycols = mycols;
3011     svals   = vals;
3012     for (i=0; i<m; i++) {
3013       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3014       smycols += ourlens[i];
3015       svals   += ourlens[i];
3016       jj++;
3017     }
3018 
3019     /* read in other processors and ship out */
3020     for (i=1; i<size; i++) {
3021       nz   = procsnz[i];
3022       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3023       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3024     }
3025     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3026   } else {
3027     /* receive numeric values */
3028     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3029 
3030     /* receive message of values*/
3031     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3032 
3033     /* insert into matrix */
3034     jj      = rstart;
3035     smycols = mycols;
3036     svals   = vals;
3037     for (i=0; i<m; i++) {
3038       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3039       smycols += ourlens[i];
3040       svals   += ourlens[i];
3041       jj++;
3042     }
3043   }
3044   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3045   ierr = PetscFree(vals);CHKERRQ(ierr);
3046   ierr = PetscFree(mycols);CHKERRQ(ierr);
3047   ierr = PetscFree(rowners);CHKERRQ(ierr);
3048   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3049   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 /* Not scalable because of ISAllGather() unless getting all columns. */
3054 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3055 {
3056   PetscErrorCode ierr;
3057   IS             iscol_local;
3058   PetscBool      isstride;
3059   PetscMPIInt    lisstride=0,gisstride;
3060 
3061   PetscFunctionBegin;
3062   /* check if we are grabbing all columns*/
3063   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3064 
3065   if (isstride) {
3066     PetscInt  start,len,mstart,mlen;
3067     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3068     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3069     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3070     if (mstart == start && mlen-mstart == len) lisstride = 1;
3071   }
3072 
3073   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3074   if (gisstride) {
3075     PetscInt N;
3076     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3077     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3078     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3079     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3080   } else {
3081     PetscInt cbs;
3082     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3083     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3084     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3085   }
3086 
3087   *isseq = iscol_local;
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 /*
3092  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3093  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3094 
3095  Input Parameters:
3096    mat - matrix
3097    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3098            i.e., mat->rstart <= isrow[i] < mat->rend
3099    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3100            i.e., mat->cstart <= iscol[i] < mat->cend
3101  Output Parameter:
3102    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3103    iscol_o - sequential column index set for retrieving mat->B
3104    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3105  */
3106 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3107 {
3108   PetscErrorCode ierr;
3109   Vec            x,cmap;
3110   const PetscInt *is_idx;
3111   PetscScalar    *xarray,*cmaparray;
3112   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3113   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3114   Mat            B=a->B;
3115   Vec            lvec=a->lvec,lcmap;
3116   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3117   MPI_Comm       comm;
3118   VecScatter     Mvctx=a->Mvctx;
3119 
3120   PetscFunctionBegin;
3121   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3122   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3123 
3124   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3125   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3126   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3127   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3128   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3129 
3130   /* Get start indices */
3131   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3132   isstart -= ncols;
3133   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3134 
3135   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3136   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3137   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3138   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3139   for (i=0; i<ncols; i++) {
3140     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3141     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3142     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3143   }
3144   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3145   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3146   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3147 
3148   /* Get iscol_d */
3149   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3150   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3151   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3152 
3153   /* Get isrow_d */
3154   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3155   rstart = mat->rmap->rstart;
3156   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3157   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3158   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3159   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3160 
3161   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3162   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3163   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3164 
3165   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3166   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3167   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3168 
3169   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3170 
3171   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3172   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3173 
3174   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3175   /* off-process column indices */
3176   count = 0;
3177   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3178   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3179 
3180   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3181   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3182   for (i=0; i<Bn; i++) {
3183     if (PetscRealPart(xarray[i]) > -1.0) {
3184       idx[count]     = i;                   /* local column index in off-diagonal part B */
3185       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3186       count++;
3187     }
3188   }
3189   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3190   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3191 
3192   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3193   /* cannot ensure iscol_o has same blocksize as iscol! */
3194 
3195   ierr = PetscFree(idx);CHKERRQ(ierr);
3196   *garray = cmap1;
3197 
3198   ierr = VecDestroy(&x);CHKERRQ(ierr);
3199   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3200   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3205 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3206 {
3207   PetscErrorCode ierr;
3208   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3209   Mat            M = NULL;
3210   MPI_Comm       comm;
3211   IS             iscol_d,isrow_d,iscol_o;
3212   Mat            Asub = NULL,Bsub = NULL;
3213   PetscInt       n;
3214 
3215   PetscFunctionBegin;
3216   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3217 
3218   if (call == MAT_REUSE_MATRIX) {
3219     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3220     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3221     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3222 
3223     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3224     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3225 
3226     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3227     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3228 
3229     /* Update diagonal and off-diagonal portions of submat */
3230     asub = (Mat_MPIAIJ*)(*submat)->data;
3231     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3232     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3233     if (n) {
3234       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3235     }
3236     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3237     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238 
3239   } else { /* call == MAT_INITIAL_MATRIX) */
3240     const PetscInt *garray;
3241     PetscInt        BsubN;
3242 
3243     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3244     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3245 
3246     /* Create local submatrices Asub and Bsub */
3247     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3248     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3249 
3250     /* Create submatrix M */
3251     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3252 
3253     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3254     asub = (Mat_MPIAIJ*)M->data;
3255 
3256     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3257     n = asub->B->cmap->N;
3258     if (BsubN > n) {
3259       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3260       const PetscInt *idx;
3261       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3262       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3263 
3264       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3265       j = 0;
3266       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3267       for (i=0; i<n; i++) {
3268         if (j >= BsubN) break;
3269         while (subgarray[i] > garray[j]) j++;
3270 
3271         if (subgarray[i] == garray[j]) {
3272           idx_new[i] = idx[j++];
3273         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3274       }
3275       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3276 
3277       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3278       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3279 
3280     } else if (BsubN < n) {
3281       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3282     }
3283 
3284     ierr = PetscFree(garray);CHKERRQ(ierr);
3285     *submat = M;
3286 
3287     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3288     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3289     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3290 
3291     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3292     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3293 
3294     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3295     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3296   }
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3301 {
3302   PetscErrorCode ierr;
3303   IS             iscol_local=NULL,isrow_d;
3304   PetscInt       csize;
3305   PetscInt       n,i,j,start,end;
3306   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3307   MPI_Comm       comm;
3308 
3309   PetscFunctionBegin;
3310   /* If isrow has same processor distribution as mat,
3311      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3312   if (call == MAT_REUSE_MATRIX) {
3313     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3314     if (isrow_d) {
3315       sameRowDist  = PETSC_TRUE;
3316       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3317     } else {
3318       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3319       if (iscol_local) {
3320         sameRowDist  = PETSC_TRUE;
3321         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3322       }
3323     }
3324   } else {
3325     /* Check if isrow has same processor distribution as mat */
3326     sameDist[0] = PETSC_FALSE;
3327     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3328     if (!n) {
3329       sameDist[0] = PETSC_TRUE;
3330     } else {
3331       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3332       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3333       if (i >= start && j < end) {
3334         sameDist[0] = PETSC_TRUE;
3335       }
3336     }
3337 
3338     /* Check if iscol has same processor distribution as mat */
3339     sameDist[1] = PETSC_FALSE;
3340     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3341     if (!n) {
3342       sameDist[1] = PETSC_TRUE;
3343     } else {
3344       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3345       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3346       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3347     }
3348 
3349     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3350     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3351     sameRowDist = tsameDist[0];
3352   }
3353 
3354   if (sameRowDist) {
3355     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3356       /* isrow and iscol have same processor distribution as mat */
3357       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3358       PetscFunctionReturn(0);
3359     } else { /* sameRowDist */
3360       /* isrow has same processor distribution as mat */
3361       if (call == MAT_INITIAL_MATRIX) {
3362         PetscBool sorted;
3363         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3364         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3365         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3366         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3367 
3368         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3369         if (sorted) {
3370           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3371           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3372           PetscFunctionReturn(0);
3373         }
3374       } else { /* call == MAT_REUSE_MATRIX */
3375         IS    iscol_sub;
3376         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3377         if (iscol_sub) {
3378           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3379           PetscFunctionReturn(0);
3380         }
3381       }
3382     }
3383   }
3384 
3385   /* General case: iscol -> iscol_local which has global size of iscol */
3386   if (call == MAT_REUSE_MATRIX) {
3387     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3388     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3389   } else {
3390     if (!iscol_local) {
3391       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3392     }
3393   }
3394 
3395   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3396   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3397 
3398   if (call == MAT_INITIAL_MATRIX) {
3399     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3400     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3401   }
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 /*@C
3406      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3407          and "off-diagonal" part of the matrix in CSR format.
3408 
3409    Collective on MPI_Comm
3410 
3411    Input Parameters:
3412 +  comm - MPI communicator
3413 .  A - "diagonal" portion of matrix
3414 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3415 -  garray - global index of B columns
3416 
3417    Output Parameter:
3418 .   mat - the matrix, with input A as its local diagonal matrix
3419    Level: advanced
3420 
3421    Notes:
3422        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3423        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3424 
3425 .seealso: MatCreateMPIAIJWithSplitArrays()
3426 @*/
3427 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3428 {
3429   PetscErrorCode ierr;
3430   Mat_MPIAIJ     *maij;
3431   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3432   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3433   PetscScalar    *oa=b->a;
3434   Mat            Bnew;
3435   PetscInt       m,n,N;
3436 
3437   PetscFunctionBegin;
3438   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3439   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3440   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3441   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3442   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3443   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3444 
3445   /* Get global columns of mat */
3446   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3447 
3448   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3449   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3450   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3451   maij = (Mat_MPIAIJ*)(*mat)->data;
3452 
3453   (*mat)->preallocated = PETSC_TRUE;
3454 
3455   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3456   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3457 
3458   /* Set A as diagonal portion of *mat */
3459   maij->A = A;
3460 
3461   nz = oi[m];
3462   for (i=0; i<nz; i++) {
3463     col   = oj[i];
3464     oj[i] = garray[col];
3465   }
3466 
3467    /* Set Bnew as off-diagonal portion of *mat */
3468   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3469   bnew        = (Mat_SeqAIJ*)Bnew->data;
3470   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3471   maij->B     = Bnew;
3472 
3473   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3474 
3475   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3476   b->free_a       = PETSC_FALSE;
3477   b->free_ij      = PETSC_FALSE;
3478   ierr = MatDestroy(&B);CHKERRQ(ierr);
3479 
3480   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3481   bnew->free_a       = PETSC_TRUE;
3482   bnew->free_ij      = PETSC_TRUE;
3483 
3484   /* condense columns of maij->B */
3485   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3486   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3487   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3488   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3489   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3494 
3495 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3496 {
3497   PetscErrorCode ierr;
3498   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3499   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3500   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3501   Mat            M,Msub,B=a->B;
3502   MatScalar      *aa;
3503   Mat_SeqAIJ     *aij;
3504   PetscInt       *garray = a->garray,*colsub,Ncols;
3505   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3506   IS             iscol_sub,iscmap;
3507   const PetscInt *is_idx,*cmap;
3508   PetscBool      allcolumns=PETSC_FALSE;
3509   MPI_Comm       comm;
3510 
3511   PetscFunctionBegin;
3512   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3513 
3514   if (call == MAT_REUSE_MATRIX) {
3515     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3516     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3517     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3518 
3519     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3520     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3521 
3522     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3523     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3524 
3525     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3526 
3527   } else { /* call == MAT_INITIAL_MATRIX) */
3528     PetscBool flg;
3529 
3530     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3531     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3532 
3533     /* (1) iscol -> nonscalable iscol_local */
3534     /* Check for special case: each processor gets entire matrix columns */
3535     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3536     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3537     if (allcolumns) {
3538       iscol_sub = iscol_local;
3539       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3540       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3541 
3542     } else {
3543       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3544       PetscInt *idx,*cmap1,k;
3545       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3546       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3547       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3548       count = 0;
3549       k     = 0;
3550       for (i=0; i<Ncols; i++) {
3551         j = is_idx[i];
3552         if (j >= cstart && j < cend) {
3553           /* diagonal part of mat */
3554           idx[count]     = j;
3555           cmap1[count++] = i; /* column index in submat */
3556         } else if (Bn) {
3557           /* off-diagonal part of mat */
3558           if (j == garray[k]) {
3559             idx[count]     = j;
3560             cmap1[count++] = i;  /* column index in submat */
3561           } else if (j > garray[k]) {
3562             while (j > garray[k] && k < Bn-1) k++;
3563             if (j == garray[k]) {
3564               idx[count]     = j;
3565               cmap1[count++] = i; /* column index in submat */
3566             }
3567           }
3568         }
3569       }
3570       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3571 
3572       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3573       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3574       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3575 
3576       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3577     }
3578 
3579     /* (3) Create sequential Msub */
3580     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3581   }
3582 
3583   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3584   aij  = (Mat_SeqAIJ*)(Msub)->data;
3585   ii   = aij->i;
3586   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3587 
3588   /*
3589       m - number of local rows
3590       Ncols - number of columns (same on all processors)
3591       rstart - first row in new global matrix generated
3592   */
3593   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3594 
3595   if (call == MAT_INITIAL_MATRIX) {
3596     /* (4) Create parallel newmat */
3597     PetscMPIInt    rank,size;
3598     PetscInt       csize;
3599 
3600     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3601     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3602 
3603     /*
3604         Determine the number of non-zeros in the diagonal and off-diagonal
3605         portions of the matrix in order to do correct preallocation
3606     */
3607 
3608     /* first get start and end of "diagonal" columns */
3609     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3610     if (csize == PETSC_DECIDE) {
3611       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3612       if (mglobal == Ncols) { /* square matrix */
3613         nlocal = m;
3614       } else {
3615         nlocal = Ncols/size + ((Ncols % size) > rank);
3616       }
3617     } else {
3618       nlocal = csize;
3619     }
3620     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3621     rstart = rend - nlocal;
3622     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3623 
3624     /* next, compute all the lengths */
3625     jj    = aij->j;
3626     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3627     olens = dlens + m;
3628     for (i=0; i<m; i++) {
3629       jend = ii[i+1] - ii[i];
3630       olen = 0;
3631       dlen = 0;
3632       for (j=0; j<jend; j++) {
3633         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3634         else dlen++;
3635         jj++;
3636       }
3637       olens[i] = olen;
3638       dlens[i] = dlen;
3639     }
3640 
3641     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3642     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3643 
3644     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3645     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3646     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3647     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3648     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3649     ierr = PetscFree(dlens);CHKERRQ(ierr);
3650 
3651   } else { /* call == MAT_REUSE_MATRIX */
3652     M    = *newmat;
3653     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3654     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3655     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3656     /*
3657          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3658        rather than the slower MatSetValues().
3659     */
3660     M->was_assembled = PETSC_TRUE;
3661     M->assembled     = PETSC_FALSE;
3662   }
3663 
3664   /* (5) Set values of Msub to *newmat */
3665   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3666   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3667 
3668   jj   = aij->j;
3669   aa   = aij->a;
3670   for (i=0; i<m; i++) {
3671     row = rstart + i;
3672     nz  = ii[i+1] - ii[i];
3673     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3674     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3675     jj += nz; aa += nz;
3676   }
3677   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3678 
3679   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3680   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3681 
3682   ierr = PetscFree(colsub);CHKERRQ(ierr);
3683 
3684   /* save Msub, iscol_sub and iscmap used in processor for next request */
3685   if (call ==  MAT_INITIAL_MATRIX) {
3686     *newmat = M;
3687     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3688     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3689 
3690     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3691     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3692 
3693     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3694     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3695 
3696     if (iscol_local) {
3697       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3698       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3699     }
3700   }
3701   PetscFunctionReturn(0);
3702 }
3703 
3704 /*
3705     Not great since it makes two copies of the submatrix, first an SeqAIJ
3706   in local and then by concatenating the local matrices the end result.
3707   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3708 
3709   Note: This requires a sequential iscol with all indices.
3710 */
3711 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3712 {
3713   PetscErrorCode ierr;
3714   PetscMPIInt    rank,size;
3715   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3716   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3717   Mat            M,Mreuse;
3718   MatScalar      *aa,*vwork;
3719   MPI_Comm       comm;
3720   Mat_SeqAIJ     *aij;
3721   PetscBool      colflag,allcolumns=PETSC_FALSE;
3722 
3723   PetscFunctionBegin;
3724   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3725   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3726   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3727 
3728   /* Check for special case: each processor gets entire matrix columns */
3729   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3730   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3731   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3732 
3733   if (call ==  MAT_REUSE_MATRIX) {
3734     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3735     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3736     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3737   } else {
3738     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3739   }
3740 
3741   /*
3742       m - number of local rows
3743       n - number of columns (same on all processors)
3744       rstart - first row in new global matrix generated
3745   */
3746   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3747   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3748   if (call == MAT_INITIAL_MATRIX) {
3749     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3750     ii  = aij->i;
3751     jj  = aij->j;
3752 
3753     /*
3754         Determine the number of non-zeros in the diagonal and off-diagonal
3755         portions of the matrix in order to do correct preallocation
3756     */
3757 
3758     /* first get start and end of "diagonal" columns */
3759     if (csize == PETSC_DECIDE) {
3760       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3761       if (mglobal == n) { /* square matrix */
3762         nlocal = m;
3763       } else {
3764         nlocal = n/size + ((n % size) > rank);
3765       }
3766     } else {
3767       nlocal = csize;
3768     }
3769     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3770     rstart = rend - nlocal;
3771     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3772 
3773     /* next, compute all the lengths */
3774     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3775     olens = dlens + m;
3776     for (i=0; i<m; i++) {
3777       jend = ii[i+1] - ii[i];
3778       olen = 0;
3779       dlen = 0;
3780       for (j=0; j<jend; j++) {
3781         if (*jj < rstart || *jj >= rend) olen++;
3782         else dlen++;
3783         jj++;
3784       }
3785       olens[i] = olen;
3786       dlens[i] = dlen;
3787     }
3788     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3789     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3790     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3791     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3792     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3793     ierr = PetscFree(dlens);CHKERRQ(ierr);
3794   } else {
3795     PetscInt ml,nl;
3796 
3797     M    = *newmat;
3798     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3799     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3800     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3801     /*
3802          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3803        rather than the slower MatSetValues().
3804     */
3805     M->was_assembled = PETSC_TRUE;
3806     M->assembled     = PETSC_FALSE;
3807   }
3808   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3809   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3810   ii   = aij->i;
3811   jj   = aij->j;
3812   aa   = aij->a;
3813   for (i=0; i<m; i++) {
3814     row   = rstart + i;
3815     nz    = ii[i+1] - ii[i];
3816     cwork = jj;     jj += nz;
3817     vwork = aa;     aa += nz;
3818     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3819   }
3820 
3821   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3822   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3823   *newmat = M;
3824 
3825   /* save submatrix used in processor for next request */
3826   if (call ==  MAT_INITIAL_MATRIX) {
3827     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3828     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3829   }
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3834 {
3835   PetscInt       m,cstart, cend,j,nnz,i,d;
3836   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3837   const PetscInt *JJ;
3838   PetscScalar    *values;
3839   PetscErrorCode ierr;
3840   PetscBool      nooffprocentries;
3841 
3842   PetscFunctionBegin;
3843   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3844 
3845   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3846   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3847   m      = B->rmap->n;
3848   cstart = B->cmap->rstart;
3849   cend   = B->cmap->rend;
3850   rstart = B->rmap->rstart;
3851 
3852   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3853 
3854 #if defined(PETSC_USE_DEBUG)
3855   for (i=0; i<m; i++) {
3856     nnz = Ii[i+1]- Ii[i];
3857     JJ  = J + Ii[i];
3858     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3859     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3860     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3861   }
3862 #endif
3863 
3864   for (i=0; i<m; i++) {
3865     nnz     = Ii[i+1]- Ii[i];
3866     JJ      = J + Ii[i];
3867     nnz_max = PetscMax(nnz_max,nnz);
3868     d       = 0;
3869     for (j=0; j<nnz; j++) {
3870       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3871     }
3872     d_nnz[i] = d;
3873     o_nnz[i] = nnz - d;
3874   }
3875   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3876   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3877 
3878   if (v) values = (PetscScalar*)v;
3879   else {
3880     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3881   }
3882 
3883   for (i=0; i<m; i++) {
3884     ii   = i + rstart;
3885     nnz  = Ii[i+1]- Ii[i];
3886     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3887   }
3888   nooffprocentries    = B->nooffprocentries;
3889   B->nooffprocentries = PETSC_TRUE;
3890   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3891   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   B->nooffprocentries = nooffprocentries;
3893 
3894   if (!v) {
3895     ierr = PetscFree(values);CHKERRQ(ierr);
3896   }
3897   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 /*@
3902    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3903    (the default parallel PETSc format).
3904 
3905    Collective on MPI_Comm
3906 
3907    Input Parameters:
3908 +  B - the matrix
3909 .  i - the indices into j for the start of each local row (starts with zero)
3910 .  j - the column indices for each local row (starts with zero)
3911 -  v - optional values in the matrix
3912 
3913    Level: developer
3914 
3915    Notes:
3916        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3917      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3918      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3919 
3920        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3921 
3922        The format which is used for the sparse matrix input, is equivalent to a
3923     row-major ordering.. i.e for the following matrix, the input data expected is
3924     as shown
3925 
3926 $        1 0 0
3927 $        2 0 3     P0
3928 $       -------
3929 $        4 5 6     P1
3930 $
3931 $     Process0 [P0]: rows_owned=[0,1]
3932 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3933 $        j =  {0,0,2}  [size = 3]
3934 $        v =  {1,2,3}  [size = 3]
3935 $
3936 $     Process1 [P1]: rows_owned=[2]
3937 $        i =  {0,3}    [size = nrow+1  = 1+1]
3938 $        j =  {0,1,2}  [size = 3]
3939 $        v =  {4,5,6}  [size = 3]
3940 
3941 .keywords: matrix, aij, compressed row, sparse, parallel
3942 
3943 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3944           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3945 @*/
3946 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3947 {
3948   PetscErrorCode ierr;
3949 
3950   PetscFunctionBegin;
3951   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3952   PetscFunctionReturn(0);
3953 }
3954 
3955 /*@C
3956    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3957    (the default parallel PETSc format).  For good matrix assembly performance
3958    the user should preallocate the matrix storage by setting the parameters
3959    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3960    performance can be increased by more than a factor of 50.
3961 
3962    Collective on MPI_Comm
3963 
3964    Input Parameters:
3965 +  B - the matrix
3966 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3967            (same value is used for all local rows)
3968 .  d_nnz - array containing the number of nonzeros in the various rows of the
3969            DIAGONAL portion of the local submatrix (possibly different for each row)
3970            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3971            The size of this array is equal to the number of local rows, i.e 'm'.
3972            For matrices that will be factored, you must leave room for (and set)
3973            the diagonal entry even if it is zero.
3974 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3975            submatrix (same value is used for all local rows).
3976 -  o_nnz - array containing the number of nonzeros in the various rows of the
3977            OFF-DIAGONAL portion of the local submatrix (possibly different for
3978            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3979            structure. The size of this array is equal to the number
3980            of local rows, i.e 'm'.
3981 
3982    If the *_nnz parameter is given then the *_nz parameter is ignored
3983 
3984    The AIJ format (also called the Yale sparse matrix format or
3985    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3986    storage.  The stored row and column indices begin with zero.
3987    See Users-Manual: ch_mat for details.
3988 
3989    The parallel matrix is partitioned such that the first m0 rows belong to
3990    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3991    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3992 
3993    The DIAGONAL portion of the local submatrix of a processor can be defined
3994    as the submatrix which is obtained by extraction the part corresponding to
3995    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3996    first row that belongs to the processor, r2 is the last row belonging to
3997    the this processor, and c1-c2 is range of indices of the local part of a
3998    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3999    common case of a square matrix, the row and column ranges are the same and
4000    the DIAGONAL part is also square. The remaining portion of the local
4001    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4002 
4003    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4004 
4005    You can call MatGetInfo() to get information on how effective the preallocation was;
4006    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4007    You can also run with the option -info and look for messages with the string
4008    malloc in them to see if additional memory allocation was needed.
4009 
4010    Example usage:
4011 
4012    Consider the following 8x8 matrix with 34 non-zero values, that is
4013    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4014    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4015    as follows:
4016 
4017 .vb
4018             1  2  0  |  0  3  0  |  0  4
4019     Proc0   0  5  6  |  7  0  0  |  8  0
4020             9  0 10  | 11  0  0  | 12  0
4021     -------------------------------------
4022            13  0 14  | 15 16 17  |  0  0
4023     Proc1   0 18  0  | 19 20 21  |  0  0
4024             0  0  0  | 22 23  0  | 24  0
4025     -------------------------------------
4026     Proc2  25 26 27  |  0  0 28  | 29  0
4027            30  0  0  | 31 32 33  |  0 34
4028 .ve
4029 
4030    This can be represented as a collection of submatrices as:
4031 
4032 .vb
4033       A B C
4034       D E F
4035       G H I
4036 .ve
4037 
4038    Where the submatrices A,B,C are owned by proc0, D,E,F are
4039    owned by proc1, G,H,I are owned by proc2.
4040 
4041    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4042    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4043    The 'M','N' parameters are 8,8, and have the same values on all procs.
4044 
4045    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4046    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4047    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4048    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4049    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4050    matrix, ans [DF] as another SeqAIJ matrix.
4051 
4052    When d_nz, o_nz parameters are specified, d_nz storage elements are
4053    allocated for every row of the local diagonal submatrix, and o_nz
4054    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4055    One way to choose d_nz and o_nz is to use the max nonzerors per local
4056    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4057    In this case, the values of d_nz,o_nz are:
4058 .vb
4059      proc0 : dnz = 2, o_nz = 2
4060      proc1 : dnz = 3, o_nz = 2
4061      proc2 : dnz = 1, o_nz = 4
4062 .ve
4063    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4064    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4065    for proc3. i.e we are using 12+15+10=37 storage locations to store
4066    34 values.
4067 
4068    When d_nnz, o_nnz parameters are specified, the storage is specified
4069    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4070    In the above case the values for d_nnz,o_nnz are:
4071 .vb
4072      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4073      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4074      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4075 .ve
4076    Here the space allocated is sum of all the above values i.e 34, and
4077    hence pre-allocation is perfect.
4078 
4079    Level: intermediate
4080 
4081 .keywords: matrix, aij, compressed row, sparse, parallel
4082 
4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4084           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4085 @*/
4086 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4087 {
4088   PetscErrorCode ierr;
4089 
4090   PetscFunctionBegin;
4091   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4092   PetscValidType(B,1);
4093   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4094   PetscFunctionReturn(0);
4095 }
4096 
4097 /*@
4098      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4099          CSR format the local rows.
4100 
4101    Collective on MPI_Comm
4102 
4103    Input Parameters:
4104 +  comm - MPI communicator
4105 .  m - number of local rows (Cannot be PETSC_DECIDE)
4106 .  n - This value should be the same as the local size used in creating the
4107        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4108        calculated if N is given) For square matrices n is almost always m.
4109 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4110 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4111 .   i - row indices
4112 .   j - column indices
4113 -   a - matrix values
4114 
4115    Output Parameter:
4116 .   mat - the matrix
4117 
4118    Level: intermediate
4119 
4120    Notes:
4121        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4122      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4123      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4124 
4125        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4126 
4127        The format which is used for the sparse matrix input, is equivalent to a
4128     row-major ordering.. i.e for the following matrix, the input data expected is
4129     as shown
4130 
4131 $        1 0 0
4132 $        2 0 3     P0
4133 $       -------
4134 $        4 5 6     P1
4135 $
4136 $     Process0 [P0]: rows_owned=[0,1]
4137 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4138 $        j =  {0,0,2}  [size = 3]
4139 $        v =  {1,2,3}  [size = 3]
4140 $
4141 $     Process1 [P1]: rows_owned=[2]
4142 $        i =  {0,3}    [size = nrow+1  = 1+1]
4143 $        j =  {0,1,2}  [size = 3]
4144 $        v =  {4,5,6}  [size = 3]
4145 
4146 .keywords: matrix, aij, compressed row, sparse, parallel
4147 
4148 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4149           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4150 @*/
4151 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4152 {
4153   PetscErrorCode ierr;
4154 
4155   PetscFunctionBegin;
4156   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4157   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4158   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4159   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4160   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4161   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4162   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4163   PetscFunctionReturn(0);
4164 }
4165 
4166 /*@C
4167    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4168    (the default parallel PETSc format).  For good matrix assembly performance
4169    the user should preallocate the matrix storage by setting the parameters
4170    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4171    performance can be increased by more than a factor of 50.
4172 
4173    Collective on MPI_Comm
4174 
4175    Input Parameters:
4176 +  comm - MPI communicator
4177 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4178            This value should be the same as the local size used in creating the
4179            y vector for the matrix-vector product y = Ax.
4180 .  n - This value should be the same as the local size used in creating the
4181        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4182        calculated if N is given) For square matrices n is almost always m.
4183 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4184 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4185 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4186            (same value is used for all local rows)
4187 .  d_nnz - array containing the number of nonzeros in the various rows of the
4188            DIAGONAL portion of the local submatrix (possibly different for each row)
4189            or NULL, if d_nz is used to specify the nonzero structure.
4190            The size of this array is equal to the number of local rows, i.e 'm'.
4191 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4192            submatrix (same value is used for all local rows).
4193 -  o_nnz - array containing the number of nonzeros in the various rows of the
4194            OFF-DIAGONAL portion of the local submatrix (possibly different for
4195            each row) or NULL, if o_nz is used to specify the nonzero
4196            structure. The size of this array is equal to the number
4197            of local rows, i.e 'm'.
4198 
4199    Output Parameter:
4200 .  A - the matrix
4201 
4202    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4203    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4204    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4205 
4206    Notes:
4207    If the *_nnz parameter is given then the *_nz parameter is ignored
4208 
4209    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4210    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4211    storage requirements for this matrix.
4212 
4213    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4214    processor than it must be used on all processors that share the object for
4215    that argument.
4216 
4217    The user MUST specify either the local or global matrix dimensions
4218    (possibly both).
4219 
4220    The parallel matrix is partitioned across processors such that the
4221    first m0 rows belong to process 0, the next m1 rows belong to
4222    process 1, the next m2 rows belong to process 2 etc.. where
4223    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4224    values corresponding to [m x N] submatrix.
4225 
4226    The columns are logically partitioned with the n0 columns belonging
4227    to 0th partition, the next n1 columns belonging to the next
4228    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4229 
4230    The DIAGONAL portion of the local submatrix on any given processor
4231    is the submatrix corresponding to the rows and columns m,n
4232    corresponding to the given processor. i.e diagonal matrix on
4233    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4234    etc. The remaining portion of the local submatrix [m x (N-n)]
4235    constitute the OFF-DIAGONAL portion. The example below better
4236    illustrates this concept.
4237 
4238    For a square global matrix we define each processor's diagonal portion
4239    to be its local rows and the corresponding columns (a square submatrix);
4240    each processor's off-diagonal portion encompasses the remainder of the
4241    local matrix (a rectangular submatrix).
4242 
4243    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4244 
4245    When calling this routine with a single process communicator, a matrix of
4246    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4247    type of communicator, use the construction mechanism
4248 .vb
4249      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4250 .ve
4251 
4252 $     MatCreate(...,&A);
4253 $     MatSetType(A,MATMPIAIJ);
4254 $     MatSetSizes(A, m,n,M,N);
4255 $     MatMPIAIJSetPreallocation(A,...);
4256 
4257    By default, this format uses inodes (identical nodes) when possible.
4258    We search for consecutive rows with the same nonzero structure, thereby
4259    reusing matrix information to achieve increased efficiency.
4260 
4261    Options Database Keys:
4262 +  -mat_no_inode  - Do not use inodes
4263 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4264 
4265 
4266 
4267    Example usage:
4268 
4269    Consider the following 8x8 matrix with 34 non-zero values, that is
4270    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4271    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4272    as follows
4273 
4274 .vb
4275             1  2  0  |  0  3  0  |  0  4
4276     Proc0   0  5  6  |  7  0  0  |  8  0
4277             9  0 10  | 11  0  0  | 12  0
4278     -------------------------------------
4279            13  0 14  | 15 16 17  |  0  0
4280     Proc1   0 18  0  | 19 20 21  |  0  0
4281             0  0  0  | 22 23  0  | 24  0
4282     -------------------------------------
4283     Proc2  25 26 27  |  0  0 28  | 29  0
4284            30  0  0  | 31 32 33  |  0 34
4285 .ve
4286 
4287    This can be represented as a collection of submatrices as
4288 
4289 .vb
4290       A B C
4291       D E F
4292       G H I
4293 .ve
4294 
4295    Where the submatrices A,B,C are owned by proc0, D,E,F are
4296    owned by proc1, G,H,I are owned by proc2.
4297 
4298    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4299    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4300    The 'M','N' parameters are 8,8, and have the same values on all procs.
4301 
4302    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4303    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4304    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4305    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4306    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4307    matrix, ans [DF] as another SeqAIJ matrix.
4308 
4309    When d_nz, o_nz parameters are specified, d_nz storage elements are
4310    allocated for every row of the local diagonal submatrix, and o_nz
4311    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4312    One way to choose d_nz and o_nz is to use the max nonzerors per local
4313    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4314    In this case, the values of d_nz,o_nz are
4315 .vb
4316      proc0 : dnz = 2, o_nz = 2
4317      proc1 : dnz = 3, o_nz = 2
4318      proc2 : dnz = 1, o_nz = 4
4319 .ve
4320    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4321    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4322    for proc3. i.e we are using 12+15+10=37 storage locations to store
4323    34 values.
4324 
4325    When d_nnz, o_nnz parameters are specified, the storage is specified
4326    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4327    In the above case the values for d_nnz,o_nnz are
4328 .vb
4329      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4330      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4331      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4332 .ve
4333    Here the space allocated is sum of all the above values i.e 34, and
4334    hence pre-allocation is perfect.
4335 
4336    Level: intermediate
4337 
4338 .keywords: matrix, aij, compressed row, sparse, parallel
4339 
4340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4341           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4342 @*/
4343 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4344 {
4345   PetscErrorCode ierr;
4346   PetscMPIInt    size;
4347 
4348   PetscFunctionBegin;
4349   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4350   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4351   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4352   if (size > 1) {
4353     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4354     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4355   } else {
4356     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4357     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4358   }
4359   PetscFunctionReturn(0);
4360 }
4361 
4362 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4363 {
4364   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4365   PetscBool      flg;
4366   PetscErrorCode ierr;
4367 
4368   PetscFunctionBegin;
4369   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4370   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4371   if (Ad)     *Ad     = a->A;
4372   if (Ao)     *Ao     = a->B;
4373   if (colmap) *colmap = a->garray;
4374   PetscFunctionReturn(0);
4375 }
4376 
4377 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4378 {
4379   PetscErrorCode ierr;
4380   PetscInt       m,N,i,rstart,nnz,Ii;
4381   PetscInt       *indx;
4382   PetscScalar    *values;
4383 
4384   PetscFunctionBegin;
4385   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4386   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4387     PetscInt       *dnz,*onz,sum,bs,cbs;
4388 
4389     if (n == PETSC_DECIDE) {
4390       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4391     }
4392     /* Check sum(n) = N */
4393     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4394     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4395 
4396     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4397     rstart -= m;
4398 
4399     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4400     for (i=0; i<m; i++) {
4401       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4402       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4403       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4404     }
4405 
4406     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4407     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4408     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4409     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4410     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4411     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4412     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4413     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4414   }
4415 
4416   /* numeric phase */
4417   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4418   for (i=0; i<m; i++) {
4419     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4420     Ii   = i + rstart;
4421     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4422     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4423   }
4424   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4425   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4430 {
4431   PetscErrorCode    ierr;
4432   PetscMPIInt       rank;
4433   PetscInt          m,N,i,rstart,nnz;
4434   size_t            len;
4435   const PetscInt    *indx;
4436   PetscViewer       out;
4437   char              *name;
4438   Mat               B;
4439   const PetscScalar *values;
4440 
4441   PetscFunctionBegin;
4442   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4443   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4444   /* Should this be the type of the diagonal block of A? */
4445   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4446   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4447   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4448   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4449   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4450   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4451   for (i=0; i<m; i++) {
4452     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4453     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4454     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4455   }
4456   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4457   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4458 
4459   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4460   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4461   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4462   sprintf(name,"%s.%d",outfile,rank);
4463   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4464   ierr = PetscFree(name);CHKERRQ(ierr);
4465   ierr = MatView(B,out);CHKERRQ(ierr);
4466   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4467   ierr = MatDestroy(&B);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4472 {
4473   PetscErrorCode      ierr;
4474   Mat_Merge_SeqsToMPI *merge;
4475   PetscContainer      container;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4479   if (container) {
4480     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4493     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4494     ierr = PetscFree(merge);CHKERRQ(ierr);
4495     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4496   }
4497   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4498   PetscFunctionReturn(0);
4499 }
4500 
4501 #include <../src/mat/utils/freespace.h>
4502 #include <petscbt.h>
4503 
4504 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4505 {
4506   PetscErrorCode      ierr;
4507   MPI_Comm            comm;
4508   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4509   PetscMPIInt         size,rank,taga,*len_s;
4510   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4511   PetscInt            proc,m;
4512   PetscInt            **buf_ri,**buf_rj;
4513   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4514   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4515   MPI_Request         *s_waits,*r_waits;
4516   MPI_Status          *status;
4517   MatScalar           *aa=a->a;
4518   MatScalar           **abuf_r,*ba_i;
4519   Mat_Merge_SeqsToMPI *merge;
4520   PetscContainer      container;
4521 
4522   PetscFunctionBegin;
4523   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4524   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4525 
4526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4528 
4529   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4530   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4531 
4532   bi     = merge->bi;
4533   bj     = merge->bj;
4534   buf_ri = merge->buf_ri;
4535   buf_rj = merge->buf_rj;
4536 
4537   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4538   owners = merge->rowmap->range;
4539   len_s  = merge->len_s;
4540 
4541   /* send and recv matrix values */
4542   /*-----------------------------*/
4543   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4544   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4545 
4546   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4547   for (proc=0,k=0; proc<size; proc++) {
4548     if (!len_s[proc]) continue;
4549     i    = owners[proc];
4550     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4551     k++;
4552   }
4553 
4554   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4555   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4556   ierr = PetscFree(status);CHKERRQ(ierr);
4557 
4558   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4559   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4560 
4561   /* insert mat values of mpimat */
4562   /*----------------------------*/
4563   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4564   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4565 
4566   for (k=0; k<merge->nrecv; k++) {
4567     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4568     nrows       = *(buf_ri_k[k]);
4569     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4570     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4571   }
4572 
4573   /* set values of ba */
4574   m = merge->rowmap->n;
4575   for (i=0; i<m; i++) {
4576     arow = owners[rank] + i;
4577     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4578     bnzi = bi[i+1] - bi[i];
4579     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4580 
4581     /* add local non-zero vals of this proc's seqmat into ba */
4582     anzi   = ai[arow+1] - ai[arow];
4583     aj     = a->j + ai[arow];
4584     aa     = a->a + ai[arow];
4585     nextaj = 0;
4586     for (j=0; nextaj<anzi; j++) {
4587       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4588         ba_i[j] += aa[nextaj++];
4589       }
4590     }
4591 
4592     /* add received vals into ba */
4593     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4594       /* i-th row */
4595       if (i == *nextrow[k]) {
4596         anzi   = *(nextai[k]+1) - *nextai[k];
4597         aj     = buf_rj[k] + *(nextai[k]);
4598         aa     = abuf_r[k] + *(nextai[k]);
4599         nextaj = 0;
4600         for (j=0; nextaj<anzi; j++) {
4601           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4602             ba_i[j] += aa[nextaj++];
4603           }
4604         }
4605         nextrow[k]++; nextai[k]++;
4606       }
4607     }
4608     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4609   }
4610   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612 
4613   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4614   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4615   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4616   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4617   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4618   PetscFunctionReturn(0);
4619 }
4620 
4621 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4622 {
4623   PetscErrorCode      ierr;
4624   Mat                 B_mpi;
4625   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4626   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4627   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4628   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4629   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4630   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4631   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4632   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4633   MPI_Status          *status;
4634   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4635   PetscBT             lnkbt;
4636   Mat_Merge_SeqsToMPI *merge;
4637   PetscContainer      container;
4638 
4639   PetscFunctionBegin;
4640   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4641 
4642   /* make sure it is a PETSc comm */
4643   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4644   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4645   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4646 
4647   ierr = PetscNew(&merge);CHKERRQ(ierr);
4648   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4649 
4650   /* determine row ownership */
4651   /*---------------------------------------------------------*/
4652   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4653   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4654   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4655   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4656   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4657   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4658   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4659 
4660   m      = merge->rowmap->n;
4661   owners = merge->rowmap->range;
4662 
4663   /* determine the number of messages to send, their lengths */
4664   /*---------------------------------------------------------*/
4665   len_s = merge->len_s;
4666 
4667   len          = 0; /* length of buf_si[] */
4668   merge->nsend = 0;
4669   for (proc=0; proc<size; proc++) {
4670     len_si[proc] = 0;
4671     if (proc == rank) {
4672       len_s[proc] = 0;
4673     } else {
4674       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4675       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4676     }
4677     if (len_s[proc]) {
4678       merge->nsend++;
4679       nrows = 0;
4680       for (i=owners[proc]; i<owners[proc+1]; i++) {
4681         if (ai[i+1] > ai[i]) nrows++;
4682       }
4683       len_si[proc] = 2*(nrows+1);
4684       len         += len_si[proc];
4685     }
4686   }
4687 
4688   /* determine the number and length of messages to receive for ij-structure */
4689   /*-------------------------------------------------------------------------*/
4690   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4691   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4692 
4693   /* post the Irecv of j-structure */
4694   /*-------------------------------*/
4695   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4696   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4697 
4698   /* post the Isend of j-structure */
4699   /*--------------------------------*/
4700   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4701 
4702   for (proc=0, k=0; proc<size; proc++) {
4703     if (!len_s[proc]) continue;
4704     i    = owners[proc];
4705     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4706     k++;
4707   }
4708 
4709   /* receives and sends of j-structure are complete */
4710   /*------------------------------------------------*/
4711   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4712   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4713 
4714   /* send and recv i-structure */
4715   /*---------------------------*/
4716   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4717   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4718 
4719   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4720   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4721   for (proc=0,k=0; proc<size; proc++) {
4722     if (!len_s[proc]) continue;
4723     /* form outgoing message for i-structure:
4724          buf_si[0]:                 nrows to be sent
4725                [1:nrows]:           row index (global)
4726                [nrows+1:2*nrows+1]: i-structure index
4727     */
4728     /*-------------------------------------------*/
4729     nrows       = len_si[proc]/2 - 1;
4730     buf_si_i    = buf_si + nrows+1;
4731     buf_si[0]   = nrows;
4732     buf_si_i[0] = 0;
4733     nrows       = 0;
4734     for (i=owners[proc]; i<owners[proc+1]; i++) {
4735       anzi = ai[i+1] - ai[i];
4736       if (anzi) {
4737         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4738         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4739         nrows++;
4740       }
4741     }
4742     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4743     k++;
4744     buf_si += len_si[proc];
4745   }
4746 
4747   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4748   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4749 
4750   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4751   for (i=0; i<merge->nrecv; i++) {
4752     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4753   }
4754 
4755   ierr = PetscFree(len_si);CHKERRQ(ierr);
4756   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4757   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4758   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4759   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4760   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4761   ierr = PetscFree(status);CHKERRQ(ierr);
4762 
4763   /* compute a local seq matrix in each processor */
4764   /*----------------------------------------------*/
4765   /* allocate bi array and free space for accumulating nonzero column info */
4766   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4767   bi[0] = 0;
4768 
4769   /* create and initialize a linked list */
4770   nlnk = N+1;
4771   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4772 
4773   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4774   len  = ai[owners[rank+1]] - ai[owners[rank]];
4775   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4776 
4777   current_space = free_space;
4778 
4779   /* determine symbolic info for each local row */
4780   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4781 
4782   for (k=0; k<merge->nrecv; k++) {
4783     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4784     nrows       = *buf_ri_k[k];
4785     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4786     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4787   }
4788 
4789   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4790   len  = 0;
4791   for (i=0; i<m; i++) {
4792     bnzi = 0;
4793     /* add local non-zero cols of this proc's seqmat into lnk */
4794     arow  = owners[rank] + i;
4795     anzi  = ai[arow+1] - ai[arow];
4796     aj    = a->j + ai[arow];
4797     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4798     bnzi += nlnk;
4799     /* add received col data into lnk */
4800     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4801       if (i == *nextrow[k]) { /* i-th row */
4802         anzi  = *(nextai[k]+1) - *nextai[k];
4803         aj    = buf_rj[k] + *nextai[k];
4804         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4805         bnzi += nlnk;
4806         nextrow[k]++; nextai[k]++;
4807       }
4808     }
4809     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4810 
4811     /* if free space is not available, make more free space */
4812     if (current_space->local_remaining<bnzi) {
4813       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4814       nspacedouble++;
4815     }
4816     /* copy data into free space, then initialize lnk */
4817     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4818     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4819 
4820     current_space->array           += bnzi;
4821     current_space->local_used      += bnzi;
4822     current_space->local_remaining -= bnzi;
4823 
4824     bi[i+1] = bi[i] + bnzi;
4825   }
4826 
4827   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4828 
4829   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4830   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4831   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4832 
4833   /* create symbolic parallel matrix B_mpi */
4834   /*---------------------------------------*/
4835   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4836   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4837   if (n==PETSC_DECIDE) {
4838     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4839   } else {
4840     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4841   }
4842   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4843   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4844   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4845   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4846   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4847 
4848   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4849   B_mpi->assembled    = PETSC_FALSE;
4850   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4851   merge->bi           = bi;
4852   merge->bj           = bj;
4853   merge->buf_ri       = buf_ri;
4854   merge->buf_rj       = buf_rj;
4855   merge->coi          = NULL;
4856   merge->coj          = NULL;
4857   merge->owners_co    = NULL;
4858 
4859   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4860 
4861   /* attach the supporting struct to B_mpi for reuse */
4862   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4863   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4864   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4865   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4866   *mpimat = B_mpi;
4867 
4868   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4869   PetscFunctionReturn(0);
4870 }
4871 
4872 /*@C
4873       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4874                  matrices from each processor
4875 
4876     Collective on MPI_Comm
4877 
4878    Input Parameters:
4879 +    comm - the communicators the parallel matrix will live on
4880 .    seqmat - the input sequential matrices
4881 .    m - number of local rows (or PETSC_DECIDE)
4882 .    n - number of local columns (or PETSC_DECIDE)
4883 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4884 
4885    Output Parameter:
4886 .    mpimat - the parallel matrix generated
4887 
4888     Level: advanced
4889 
4890    Notes:
4891      The dimensions of the sequential matrix in each processor MUST be the same.
4892      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4893      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4894 @*/
4895 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4896 {
4897   PetscErrorCode ierr;
4898   PetscMPIInt    size;
4899 
4900   PetscFunctionBegin;
4901   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4902   if (size == 1) {
4903     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4904     if (scall == MAT_INITIAL_MATRIX) {
4905       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4906     } else {
4907       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4908     }
4909     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4910     PetscFunctionReturn(0);
4911   }
4912   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4913   if (scall == MAT_INITIAL_MATRIX) {
4914     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4915   }
4916   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4917   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4918   PetscFunctionReturn(0);
4919 }
4920 
4921 /*@
4922      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4923           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4924           with MatGetSize()
4925 
4926     Not Collective
4927 
4928    Input Parameters:
4929 +    A - the matrix
4930 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4931 
4932    Output Parameter:
4933 .    A_loc - the local sequential matrix generated
4934 
4935     Level: developer
4936 
4937 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4938 
4939 @*/
4940 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4941 {
4942   PetscErrorCode ierr;
4943   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4944   Mat_SeqAIJ     *mat,*a,*b;
4945   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4946   MatScalar      *aa,*ba,*cam;
4947   PetscScalar    *ca;
4948   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4949   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4950   PetscBool      match;
4951   MPI_Comm       comm;
4952   PetscMPIInt    size;
4953 
4954   PetscFunctionBegin;
4955   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4956   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4957   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4958   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4959   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4960 
4961   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4962   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4963   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4964   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4965   aa = a->a; ba = b->a;
4966   if (scall == MAT_INITIAL_MATRIX) {
4967     if (size == 1) {
4968       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4969       PetscFunctionReturn(0);
4970     }
4971 
4972     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4973     ci[0] = 0;
4974     for (i=0; i<am; i++) {
4975       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4976     }
4977     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4978     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4979     k    = 0;
4980     for (i=0; i<am; i++) {
4981       ncols_o = bi[i+1] - bi[i];
4982       ncols_d = ai[i+1] - ai[i];
4983       /* off-diagonal portion of A */
4984       for (jo=0; jo<ncols_o; jo++) {
4985         col = cmap[*bj];
4986         if (col >= cstart) break;
4987         cj[k]   = col; bj++;
4988         ca[k++] = *ba++;
4989       }
4990       /* diagonal portion of A */
4991       for (j=0; j<ncols_d; j++) {
4992         cj[k]   = cstart + *aj++;
4993         ca[k++] = *aa++;
4994       }
4995       /* off-diagonal portion of A */
4996       for (j=jo; j<ncols_o; j++) {
4997         cj[k]   = cmap[*bj++];
4998         ca[k++] = *ba++;
4999       }
5000     }
5001     /* put together the new matrix */
5002     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5003     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5004     /* Since these are PETSc arrays, change flags to free them as necessary. */
5005     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5006     mat->free_a  = PETSC_TRUE;
5007     mat->free_ij = PETSC_TRUE;
5008     mat->nonew   = 0;
5009   } else if (scall == MAT_REUSE_MATRIX) {
5010     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5011     ci = mat->i; cj = mat->j; cam = mat->a;
5012     for (i=0; i<am; i++) {
5013       /* off-diagonal portion of A */
5014       ncols_o = bi[i+1] - bi[i];
5015       for (jo=0; jo<ncols_o; jo++) {
5016         col = cmap[*bj];
5017         if (col >= cstart) break;
5018         *cam++ = *ba++; bj++;
5019       }
5020       /* diagonal portion of A */
5021       ncols_d = ai[i+1] - ai[i];
5022       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5023       /* off-diagonal portion of A */
5024       for (j=jo; j<ncols_o; j++) {
5025         *cam++ = *ba++; bj++;
5026       }
5027     }
5028   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5029   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5030   PetscFunctionReturn(0);
5031 }
5032 
5033 /*@C
5034      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5035 
5036     Not Collective
5037 
5038    Input Parameters:
5039 +    A - the matrix
5040 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5041 -    row, col - index sets of rows and columns to extract (or NULL)
5042 
5043    Output Parameter:
5044 .    A_loc - the local sequential matrix generated
5045 
5046     Level: developer
5047 
5048 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5049 
5050 @*/
5051 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5052 {
5053   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5054   PetscErrorCode ierr;
5055   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5056   IS             isrowa,iscola;
5057   Mat            *aloc;
5058   PetscBool      match;
5059 
5060   PetscFunctionBegin;
5061   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5062   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5063   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5064   if (!row) {
5065     start = A->rmap->rstart; end = A->rmap->rend;
5066     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5067   } else {
5068     isrowa = *row;
5069   }
5070   if (!col) {
5071     start = A->cmap->rstart;
5072     cmap  = a->garray;
5073     nzA   = a->A->cmap->n;
5074     nzB   = a->B->cmap->n;
5075     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5076     ncols = 0;
5077     for (i=0; i<nzB; i++) {
5078       if (cmap[i] < start) idx[ncols++] = cmap[i];
5079       else break;
5080     }
5081     imark = i;
5082     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5083     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5084     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5085   } else {
5086     iscola = *col;
5087   }
5088   if (scall != MAT_INITIAL_MATRIX) {
5089     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5090     aloc[0] = *A_loc;
5091   }
5092   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5093   if (!col) { /* attach global id of condensed columns */
5094     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5095   }
5096   *A_loc = aloc[0];
5097   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5098   if (!row) {
5099     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5100   }
5101   if (!col) {
5102     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5103   }
5104   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5105   PetscFunctionReturn(0);
5106 }
5107 
5108 /*@C
5109     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5110 
5111     Collective on Mat
5112 
5113    Input Parameters:
5114 +    A,B - the matrices in mpiaij format
5115 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5116 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5117 
5118    Output Parameter:
5119 +    rowb, colb - index sets of rows and columns of B to extract
5120 -    B_seq - the sequential matrix generated
5121 
5122     Level: developer
5123 
5124 @*/
5125 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5126 {
5127   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5128   PetscErrorCode ierr;
5129   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5130   IS             isrowb,iscolb;
5131   Mat            *bseq=NULL;
5132 
5133   PetscFunctionBegin;
5134   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5135     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5136   }
5137   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5138 
5139   if (scall == MAT_INITIAL_MATRIX) {
5140     start = A->cmap->rstart;
5141     cmap  = a->garray;
5142     nzA   = a->A->cmap->n;
5143     nzB   = a->B->cmap->n;
5144     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5145     ncols = 0;
5146     for (i=0; i<nzB; i++) {  /* row < local row index */
5147       if (cmap[i] < start) idx[ncols++] = cmap[i];
5148       else break;
5149     }
5150     imark = i;
5151     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5152     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5153     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5154     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5155   } else {
5156     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5157     isrowb  = *rowb; iscolb = *colb;
5158     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5159     bseq[0] = *B_seq;
5160   }
5161   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5162   *B_seq = bseq[0];
5163   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5164   if (!rowb) {
5165     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5166   } else {
5167     *rowb = isrowb;
5168   }
5169   if (!colb) {
5170     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5171   } else {
5172     *colb = iscolb;
5173   }
5174   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5175   PetscFunctionReturn(0);
5176 }
5177 
5178 /*
5179     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5180     of the OFF-DIAGONAL portion of local A
5181 
5182     Collective on Mat
5183 
5184    Input Parameters:
5185 +    A,B - the matrices in mpiaij format
5186 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5187 
5188    Output Parameter:
5189 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5190 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5191 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5192 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5193 
5194     Level: developer
5195 
5196 */
5197 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5198 {
5199   VecScatter_MPI_General *gen_to,*gen_from;
5200   PetscErrorCode         ierr;
5201   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5202   Mat_SeqAIJ             *b_oth;
5203   VecScatter             ctx;
5204   MPI_Comm               comm;
5205   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5206   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5207   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5208   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5209   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5210   MPI_Request            *rwaits = NULL,*swaits = NULL;
5211   MPI_Status             *sstatus,rstatus;
5212   PetscMPIInt            jj,size;
5213   VecScatterType         type;
5214   PetscBool              mpi1;
5215 
5216   PetscFunctionBegin;
5217   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5218   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5219 
5220   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5221     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5222   }
5223   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5224   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5225 
5226   if (size == 1) {
5227     startsj_s = NULL;
5228     bufa_ptr  = NULL;
5229     *B_oth    = NULL;
5230     PetscFunctionReturn(0);
5231   }
5232 
5233   ctx = a->Mvctx;
5234   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5235   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5236   if (!mpi1) {
5237     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5238      thus create a->Mvctx_mpi1 */
5239     if (!a->Mvctx_mpi1) {
5240       a->Mvctx_mpi1_flg = PETSC_TRUE;
5241       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5242     }
5243     ctx = a->Mvctx_mpi1;
5244   }
5245   tag = ((PetscObject)ctx)->tag;
5246 
5247   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5248   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5249   nrecvs   = gen_from->n;
5250   nsends   = gen_to->n;
5251 
5252   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5253   srow    = gen_to->indices;    /* local row index to be sent */
5254   sstarts = gen_to->starts;
5255   sprocs  = gen_to->procs;
5256   sstatus = gen_to->sstatus;
5257   sbs     = gen_to->bs;
5258   rstarts = gen_from->starts;
5259   rprocs  = gen_from->procs;
5260   rbs     = gen_from->bs;
5261 
5262   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5263   if (scall == MAT_INITIAL_MATRIX) {
5264     /* i-array */
5265     /*---------*/
5266     /*  post receives */
5267     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5268     for (i=0; i<nrecvs; i++) {
5269       rowlen = rvalues + rstarts[i]*rbs;
5270       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5271       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5272     }
5273 
5274     /* pack the outgoing message */
5275     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5276 
5277     sstartsj[0] = 0;
5278     rstartsj[0] = 0;
5279     len         = 0; /* total length of j or a array to be sent */
5280     k           = 0;
5281     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5282     for (i=0; i<nsends; i++) {
5283       rowlen = svalues + sstarts[i]*sbs;
5284       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5285       for (j=0; j<nrows; j++) {
5286         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5287         for (l=0; l<sbs; l++) {
5288           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5289 
5290           rowlen[j*sbs+l] = ncols;
5291 
5292           len += ncols;
5293           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5294         }
5295         k++;
5296       }
5297       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5298 
5299       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5300     }
5301     /* recvs and sends of i-array are completed */
5302     i = nrecvs;
5303     while (i--) {
5304       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5305     }
5306     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5307     ierr = PetscFree(svalues);CHKERRQ(ierr);
5308 
5309     /* allocate buffers for sending j and a arrays */
5310     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5311     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5312 
5313     /* create i-array of B_oth */
5314     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5315 
5316     b_othi[0] = 0;
5317     len       = 0; /* total length of j or a array to be received */
5318     k         = 0;
5319     for (i=0; i<nrecvs; i++) {
5320       rowlen = rvalues + rstarts[i]*rbs;
5321       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5322       for (j=0; j<nrows; j++) {
5323         b_othi[k+1] = b_othi[k] + rowlen[j];
5324         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5325         k++;
5326       }
5327       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5328     }
5329     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5330 
5331     /* allocate space for j and a arrrays of B_oth */
5332     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5333     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5334 
5335     /* j-array */
5336     /*---------*/
5337     /*  post receives of j-array */
5338     for (i=0; i<nrecvs; i++) {
5339       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5340       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5341     }
5342 
5343     /* pack the outgoing message j-array */
5344     k = 0;
5345     for (i=0; i<nsends; i++) {
5346       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5347       bufJ  = bufj+sstartsj[i];
5348       for (j=0; j<nrows; j++) {
5349         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5350         for (ll=0; ll<sbs; ll++) {
5351           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5352           for (l=0; l<ncols; l++) {
5353             *bufJ++ = cols[l];
5354           }
5355           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5356         }
5357       }
5358       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5359     }
5360 
5361     /* recvs and sends of j-array are completed */
5362     i = nrecvs;
5363     while (i--) {
5364       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5365     }
5366     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5367   } else if (scall == MAT_REUSE_MATRIX) {
5368     sstartsj = *startsj_s;
5369     rstartsj = *startsj_r;
5370     bufa     = *bufa_ptr;
5371     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5372     b_otha   = b_oth->a;
5373   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5374 
5375   /* a-array */
5376   /*---------*/
5377   /*  post receives of a-array */
5378   for (i=0; i<nrecvs; i++) {
5379     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5380     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5381   }
5382 
5383   /* pack the outgoing message a-array */
5384   k = 0;
5385   for (i=0; i<nsends; i++) {
5386     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5387     bufA  = bufa+sstartsj[i];
5388     for (j=0; j<nrows; j++) {
5389       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5390       for (ll=0; ll<sbs; ll++) {
5391         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5392         for (l=0; l<ncols; l++) {
5393           *bufA++ = vals[l];
5394         }
5395         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5396       }
5397     }
5398     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5399   }
5400   /* recvs and sends of a-array are completed */
5401   i = nrecvs;
5402   while (i--) {
5403     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5404   }
5405   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5406   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5407 
5408   if (scall == MAT_INITIAL_MATRIX) {
5409     /* put together the new matrix */
5410     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5411 
5412     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5413     /* Since these are PETSc arrays, change flags to free them as necessary. */
5414     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5415     b_oth->free_a  = PETSC_TRUE;
5416     b_oth->free_ij = PETSC_TRUE;
5417     b_oth->nonew   = 0;
5418 
5419     ierr = PetscFree(bufj);CHKERRQ(ierr);
5420     if (!startsj_s || !bufa_ptr) {
5421       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5422       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5423     } else {
5424       *startsj_s = sstartsj;
5425       *startsj_r = rstartsj;
5426       *bufa_ptr  = bufa;
5427     }
5428   }
5429   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5430   PetscFunctionReturn(0);
5431 }
5432 
5433 /*@C
5434   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5435 
5436   Not Collective
5437 
5438   Input Parameters:
5439 . A - The matrix in mpiaij format
5440 
5441   Output Parameter:
5442 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5443 . colmap - A map from global column index to local index into lvec
5444 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5445 
5446   Level: developer
5447 
5448 @*/
5449 #if defined(PETSC_USE_CTABLE)
5450 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5451 #else
5452 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5453 #endif
5454 {
5455   Mat_MPIAIJ *a;
5456 
5457   PetscFunctionBegin;
5458   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5459   PetscValidPointer(lvec, 2);
5460   PetscValidPointer(colmap, 3);
5461   PetscValidPointer(multScatter, 4);
5462   a = (Mat_MPIAIJ*) A->data;
5463   if (lvec) *lvec = a->lvec;
5464   if (colmap) *colmap = a->colmap;
5465   if (multScatter) *multScatter = a->Mvctx;
5466   PetscFunctionReturn(0);
5467 }
5468 
5469 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5470 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5471 #if defined(PETSC_HAVE_MKL_SPARSE)
5472 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5473 #endif
5474 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5475 #if defined(PETSC_HAVE_ELEMENTAL)
5476 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5477 #endif
5478 #if defined(PETSC_HAVE_HYPRE)
5479 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5480 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5481 #endif
5482 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5483 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5484 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5485 
5486 /*
5487     Computes (B'*A')' since computing B*A directly is untenable
5488 
5489                n                       p                          p
5490         (              )       (              )         (                  )
5491       m (      A       )  *  n (       B      )   =   m (         C        )
5492         (              )       (              )         (                  )
5493 
5494 */
5495 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5496 {
5497   PetscErrorCode ierr;
5498   Mat            At,Bt,Ct;
5499 
5500   PetscFunctionBegin;
5501   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5502   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5503   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5504   ierr = MatDestroy(&At);CHKERRQ(ierr);
5505   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5506   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5507   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5508   PetscFunctionReturn(0);
5509 }
5510 
5511 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5512 {
5513   PetscErrorCode ierr;
5514   PetscInt       m=A->rmap->n,n=B->cmap->n;
5515   Mat            Cmat;
5516 
5517   PetscFunctionBegin;
5518   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5519   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5520   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5521   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5522   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5523   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5524   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5525   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5526 
5527   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5528 
5529   *C = Cmat;
5530   PetscFunctionReturn(0);
5531 }
5532 
5533 /* ----------------------------------------------------------------*/
5534 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5535 {
5536   PetscErrorCode ierr;
5537 
5538   PetscFunctionBegin;
5539   if (scall == MAT_INITIAL_MATRIX) {
5540     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5541     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5542     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5543   }
5544   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5545   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5546   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5547   PetscFunctionReturn(0);
5548 }
5549 
5550 /*MC
5551    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5552 
5553    Options Database Keys:
5554 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5555 
5556   Level: beginner
5557 
5558 .seealso: MatCreateAIJ()
5559 M*/
5560 
5561 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5562 {
5563   Mat_MPIAIJ     *b;
5564   PetscErrorCode ierr;
5565   PetscMPIInt    size;
5566 
5567   PetscFunctionBegin;
5568   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5569 
5570   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5571   B->data       = (void*)b;
5572   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5573   B->assembled  = PETSC_FALSE;
5574   B->insertmode = NOT_SET_VALUES;
5575   b->size       = size;
5576 
5577   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5578 
5579   /* build cache for off array entries formed */
5580   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5581 
5582   b->donotstash  = PETSC_FALSE;
5583   b->colmap      = 0;
5584   b->garray      = 0;
5585   b->roworiented = PETSC_TRUE;
5586 
5587   /* stuff used for matrix vector multiply */
5588   b->lvec  = NULL;
5589   b->Mvctx = NULL;
5590 
5591   /* stuff for MatGetRow() */
5592   b->rowindices   = 0;
5593   b->rowvalues    = 0;
5594   b->getrowactive = PETSC_FALSE;
5595 
5596   /* flexible pointer used in CUSP/CUSPARSE classes */
5597   b->spptr = NULL;
5598 
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5608 #if defined(PETSC_HAVE_MKL_SPARSE)
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5610 #endif
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5613 #if defined(PETSC_HAVE_ELEMENTAL)
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5615 #endif
5616 #if defined(PETSC_HAVE_HYPRE)
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5618 #endif
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5624 #if defined(PETSC_HAVE_HYPRE)
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5626 #endif
5627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5628   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5629   PetscFunctionReturn(0);
5630 }
5631 
5632 /*@C
5633      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5634          and "off-diagonal" part of the matrix in CSR format.
5635 
5636    Collective on MPI_Comm
5637 
5638    Input Parameters:
5639 +  comm - MPI communicator
5640 .  m - number of local rows (Cannot be PETSC_DECIDE)
5641 .  n - This value should be the same as the local size used in creating the
5642        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5643        calculated if N is given) For square matrices n is almost always m.
5644 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5645 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5646 .   i - row indices for "diagonal" portion of matrix
5647 .   j - column indices
5648 .   a - matrix values
5649 .   oi - row indices for "off-diagonal" portion of matrix
5650 .   oj - column indices
5651 -   oa - matrix values
5652 
5653    Output Parameter:
5654 .   mat - the matrix
5655 
5656    Level: advanced
5657 
5658    Notes:
5659        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5660        must free the arrays once the matrix has been destroyed and not before.
5661 
5662        The i and j indices are 0 based
5663 
5664        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5665 
5666        This sets local rows and cannot be used to set off-processor values.
5667 
5668        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5669        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5670        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5671        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5672        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5673        communication if it is known that only local entries will be set.
5674 
5675 .keywords: matrix, aij, compressed row, sparse, parallel
5676 
5677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5678           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5679 @*/
5680 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5681 {
5682   PetscErrorCode ierr;
5683   Mat_MPIAIJ     *maij;
5684 
5685   PetscFunctionBegin;
5686   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5687   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5688   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5689   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5690   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5691   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5692   maij = (Mat_MPIAIJ*) (*mat)->data;
5693 
5694   (*mat)->preallocated = PETSC_TRUE;
5695 
5696   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5697   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5698 
5699   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5700   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5701 
5702   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5705   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5706 
5707   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5708   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5709   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5710   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5711   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5712   PetscFunctionReturn(0);
5713 }
5714 
5715 /*
5716     Special version for direct calls from Fortran
5717 */
5718 #include <petsc/private/fortranimpl.h>
5719 
5720 /* Change these macros so can be used in void function */
5721 #undef CHKERRQ
5722 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5723 #undef SETERRQ2
5724 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5725 #undef SETERRQ3
5726 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5727 #undef SETERRQ
5728 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5729 
5730 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5731 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5732 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5733 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5734 #else
5735 #endif
5736 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5737 {
5738   Mat            mat  = *mmat;
5739   PetscInt       m    = *mm, n = *mn;
5740   InsertMode     addv = *maddv;
5741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5742   PetscScalar    value;
5743   PetscErrorCode ierr;
5744 
5745   MatCheckPreallocated(mat,1);
5746   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5747 
5748 #if defined(PETSC_USE_DEBUG)
5749   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5750 #endif
5751   {
5752     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5753     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5754     PetscBool roworiented = aij->roworiented;
5755 
5756     /* Some Variables required in the macro */
5757     Mat        A                 = aij->A;
5758     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5759     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5760     MatScalar  *aa               = a->a;
5761     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5762     Mat        B                 = aij->B;
5763     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5764     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5765     MatScalar  *ba               = b->a;
5766 
5767     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5768     PetscInt  nonew = a->nonew;
5769     MatScalar *ap1,*ap2;
5770 
5771     PetscFunctionBegin;
5772     for (i=0; i<m; i++) {
5773       if (im[i] < 0) continue;
5774 #if defined(PETSC_USE_DEBUG)
5775       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5776 #endif
5777       if (im[i] >= rstart && im[i] < rend) {
5778         row      = im[i] - rstart;
5779         lastcol1 = -1;
5780         rp1      = aj + ai[row];
5781         ap1      = aa + ai[row];
5782         rmax1    = aimax[row];
5783         nrow1    = ailen[row];
5784         low1     = 0;
5785         high1    = nrow1;
5786         lastcol2 = -1;
5787         rp2      = bj + bi[row];
5788         ap2      = ba + bi[row];
5789         rmax2    = bimax[row];
5790         nrow2    = bilen[row];
5791         low2     = 0;
5792         high2    = nrow2;
5793 
5794         for (j=0; j<n; j++) {
5795           if (roworiented) value = v[i*n+j];
5796           else value = v[i+j*m];
5797           if (in[j] >= cstart && in[j] < cend) {
5798             col = in[j] - cstart;
5799             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5800             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5801           } else if (in[j] < 0) continue;
5802 #if defined(PETSC_USE_DEBUG)
5803           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5804           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5805 #endif
5806           else {
5807             if (mat->was_assembled) {
5808               if (!aij->colmap) {
5809                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5810               }
5811 #if defined(PETSC_USE_CTABLE)
5812               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5813               col--;
5814 #else
5815               col = aij->colmap[in[j]] - 1;
5816 #endif
5817               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5818               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5819                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5820                 col  =  in[j];
5821                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5822                 B     = aij->B;
5823                 b     = (Mat_SeqAIJ*)B->data;
5824                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5825                 rp2   = bj + bi[row];
5826                 ap2   = ba + bi[row];
5827                 rmax2 = bimax[row];
5828                 nrow2 = bilen[row];
5829                 low2  = 0;
5830                 high2 = nrow2;
5831                 bm    = aij->B->rmap->n;
5832                 ba    = b->a;
5833               }
5834             } else col = in[j];
5835             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5836           }
5837         }
5838       } else if (!aij->donotstash) {
5839         if (roworiented) {
5840           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5841         } else {
5842           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5843         }
5844       }
5845     }
5846   }
5847   PetscFunctionReturnVoid();
5848 }
5849 
5850