xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3a063d27f4c817df1b0492f2af7fd41f67eb021b)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 
636 PetscErrorCode MatSetValues_MPIAIJ_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz)
637 {
638   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
639   Mat            A      = aij->A; /* diagonal part of the matrix */
640   Mat            B      = aij->B; /* offdiagonal part of the matrix */
641   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
642   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
643   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
644   PetscInt       *ailen = a->ilen,*aj = a->j;
645   PetscInt       *bilen = b->ilen,*bj = b->j;
646   PetscInt       am     = aij->A->rmap->n,i;
647   PetscInt       col, diag_so_far=0, offd_so_far=0,j,*first_diag_elem;
648   PetscErrorCode ierr;
649 
650   PetscFunctionBegin;
651 
652   /* Allocate memory. first_diag stores which element tells which element of mat_j stores the first diagonal element */
653   ierr = PetscMalloc1(am+1, &first_diag_elem);CHKERRQ(ierr);
654 
655   /* Find first index of mat_j which is in the diagonal and store it in first_diag */
656   /* Iterate over all rows of the matrix */
657   for (j=0; j<am; j++) {
658     /* Iterate over all columns, until a diagonal element is found */
659     for (i=0; i<dnz[j]+onz[j]; i++) {
660       col = i + mat_i[j];
661       /* If current element is in the diagonal */
662       if (mat_j[col] >= cstart && mat_j[col] < cend) {
663         first_diag_elem[j] = i;
664         break;
665       }
666     }
667   }
668 
669   /* Set the off-diagonal elements */
670   for (j=0; j<am; j++) {
671     /* left off-diagonal */
672     for (i=0; i<onz[j] && i<first_diag_elem[j]; i++) {
673       col = i + mat_i[j];
674       bj[i+offd_so_far] = mat_j[col];
675     }
676     /* right off-diagonal */
677     for (i=i+dnz[j]; i<dnz[j]+onz[j]; i++) {
678       col = i + mat_i[j];
679       bj[i-dnz[j]+offd_so_far] = mat_j[col];
680     }
681     bilen[j] = onz[j];
682     offd_so_far += onz[j];
683   }
684 
685   /* Set the diagonal elements */
686   for (j=0; j<am; j++) {
687     for (i=0; i<dnz[j]; i++) {
688       col = i + mat_i[j];
689       aj[diag_so_far + i] = mat_j[col+first_diag_elem[j]] - cstart;
690       ailen[j] = dnz[j];
691     }
692     diag_so_far += dnz[j];
693   }
694   ierr = PetscFree(first_diag_elem);CHKERRQ(ierr);
695   PetscFunctionReturn(0);
696 }
697 
698 
699 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
700 {
701   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
702   PetscErrorCode ierr;
703   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
704   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
705 
706   PetscFunctionBegin;
707   for (i=0; i<m; i++) {
708     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
709     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
710     if (idxm[i] >= rstart && idxm[i] < rend) {
711       row = idxm[i] - rstart;
712       for (j=0; j<n; j++) {
713         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
714         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
715         if (idxn[j] >= cstart && idxn[j] < cend) {
716           col  = idxn[j] - cstart;
717           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
718         } else {
719           if (!aij->colmap) {
720             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
721           }
722 #if defined(PETSC_USE_CTABLE)
723           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
724           col--;
725 #else
726           col = aij->colmap[idxn[j]] - 1;
727 #endif
728           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
729           else {
730             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
731           }
732         }
733       }
734     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
740 
741 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
742 {
743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
744   PetscErrorCode ierr;
745   PetscInt       nstash,reallocs;
746 
747   PetscFunctionBegin;
748   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
749 
750   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
751   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
752   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
757 {
758   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
759   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
760   PetscErrorCode ierr;
761   PetscMPIInt    n;
762   PetscInt       i,j,rstart,ncols,flg;
763   PetscInt       *row,*col;
764   PetscBool      other_disassembled;
765   PetscScalar    *val;
766 
767   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
768 
769   PetscFunctionBegin;
770   if (!aij->donotstash && !mat->nooffprocentries) {
771     while (1) {
772       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
773       if (!flg) break;
774 
775       for (i=0; i<n; ) {
776         /* Now identify the consecutive vals belonging to the same row */
777         for (j=i,rstart=row[j]; j<n; j++) {
778           if (row[j] != rstart) break;
779         }
780         if (j < n) ncols = j-i;
781         else       ncols = n-i;
782         /* Now assemble all these values with a single function call */
783         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
784 
785         i = j;
786       }
787     }
788     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
789   }
790   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
791   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
792 
793   /* determine if any processor has disassembled, if so we must
794      also disassemble ourselfs, in order that we may reassemble. */
795   /*
796      if nonzero structure of submatrix B cannot change then we know that
797      no processor disassembled thus we can skip this stuff
798   */
799   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
800     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
801     if (mat->was_assembled && !other_disassembled) {
802       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
803     }
804   }
805   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
806     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
807   }
808   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
809   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
810   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
811 
812   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
813 
814   aij->rowvalues = 0;
815 
816   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
817   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
818 
819   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
820   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
821     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
822     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
823   }
824   PetscFunctionReturn(0);
825 }
826 
827 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
828 {
829   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
834   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
841   PetscInt      *lrows;
842   PetscInt       r, len;
843   PetscBool      cong;
844   PetscErrorCode ierr;
845 
846   PetscFunctionBegin;
847   /* get locally owned rows */
848   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
849   /* fix right hand side if needed */
850   if (x && b) {
851     const PetscScalar *xx;
852     PetscScalar       *bb;
853 
854     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
855     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
856     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
857     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
858     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
859   }
860   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
861   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
862   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
863   if ((diag != 0.0) && cong) {
864     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
865   } else if (diag != 0.0) {
866     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
867     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
868     for (r = 0; r < len; ++r) {
869       const PetscInt row = lrows[r] + A->rmap->rstart;
870       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
871     }
872     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
874   } else {
875     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
876   }
877   ierr = PetscFree(lrows);CHKERRQ(ierr);
878 
879   /* only change matrix nonzero state if pattern was allowed to be changed */
880   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
881     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
882     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
888 {
889   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
890   PetscErrorCode    ierr;
891   PetscMPIInt       n = A->rmap->n;
892   PetscInt          i,j,r,m,p = 0,len = 0;
893   PetscInt          *lrows,*owners = A->rmap->range;
894   PetscSFNode       *rrows;
895   PetscSF           sf;
896   const PetscScalar *xx;
897   PetscScalar       *bb,*mask;
898   Vec               xmask,lmask;
899   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
900   const PetscInt    *aj, *ii,*ridx;
901   PetscScalar       *aa;
902 
903   PetscFunctionBegin;
904   /* Create SF where leaves are input rows and roots are owned rows */
905   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
906   for (r = 0; r < n; ++r) lrows[r] = -1;
907   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
908   for (r = 0; r < N; ++r) {
909     const PetscInt idx   = rows[r];
910     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
911     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
912       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
913     }
914     rrows[r].rank  = p;
915     rrows[r].index = rows[r] - owners[p];
916   }
917   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
918   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
919   /* Collect flags for rows to be zeroed */
920   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
921   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
922   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
923   /* Compress and put in row numbers */
924   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
925   /* zero diagonal part of matrix */
926   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
927   /* handle off diagonal part of matrix */
928   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
929   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
930   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
931   for (i=0; i<len; i++) bb[lrows[i]] = 1;
932   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
933   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
934   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
936   if (x) {
937     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
938     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
940     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
941   }
942   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
943   /* remove zeroed rows of off diagonal matrix */
944   ii = aij->i;
945   for (i=0; i<len; i++) {
946     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
947   }
948   /* loop over all elements of off process part of matrix zeroing removed columns*/
949   if (aij->compressedrow.use) {
950     m    = aij->compressedrow.nrows;
951     ii   = aij->compressedrow.i;
952     ridx = aij->compressedrow.rindex;
953     for (i=0; i<m; i++) {
954       n  = ii[i+1] - ii[i];
955       aj = aij->j + ii[i];
956       aa = aij->a + ii[i];
957 
958       for (j=0; j<n; j++) {
959         if (PetscAbsScalar(mask[*aj])) {
960           if (b) bb[*ridx] -= *aa*xx[*aj];
961           *aa = 0.0;
962         }
963         aa++;
964         aj++;
965       }
966       ridx++;
967     }
968   } else { /* do not use compressed row format */
969     m = l->B->rmap->n;
970     for (i=0; i<m; i++) {
971       n  = ii[i+1] - ii[i];
972       aj = aij->j + ii[i];
973       aa = aij->a + ii[i];
974       for (j=0; j<n; j++) {
975         if (PetscAbsScalar(mask[*aj])) {
976           if (b) bb[i] -= *aa*xx[*aj];
977           *aa = 0.0;
978         }
979         aa++;
980         aj++;
981       }
982     }
983   }
984   if (x) {
985     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
986     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
987   }
988   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
989   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
990   ierr = PetscFree(lrows);CHKERRQ(ierr);
991 
992   /* only change matrix nonzero state if pattern was allowed to be changed */
993   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
994     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
995     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
996   }
997   PetscFunctionReturn(0);
998 }
999 
1000 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1001 {
1002   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1003   PetscErrorCode ierr;
1004   PetscInt       nt;
1005   VecScatter     Mvctx = a->Mvctx;
1006 
1007   PetscFunctionBegin;
1008   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1009   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1010 
1011   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1012   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1013   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1019 {
1020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1029 {
1030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1031   PetscErrorCode ierr;
1032   VecScatter     Mvctx = a->Mvctx;
1033 
1034   PetscFunctionBegin;
1035   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1036   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1038   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1039   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1044 {
1045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1046   PetscErrorCode ierr;
1047   PetscBool      merged;
1048 
1049   PetscFunctionBegin;
1050   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1051   /* do nondiagonal part */
1052   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1053   if (!merged) {
1054     /* send it on its way */
1055     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056     /* do local part */
1057     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1058     /* receive remote parts: note this assumes the values are not actually */
1059     /* added in yy until the next line, */
1060     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1061   } else {
1062     /* do local part */
1063     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1064     /* send it on its way */
1065     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1066     /* values actually were received in the Begin() but we need to call this nop */
1067     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1073 {
1074   MPI_Comm       comm;
1075   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1076   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1077   IS             Me,Notme;
1078   PetscErrorCode ierr;
1079   PetscInt       M,N,first,last,*notme,i;
1080   PetscMPIInt    size;
1081 
1082   PetscFunctionBegin;
1083   /* Easy test: symmetric diagonal block */
1084   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1085   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1086   if (!*f) PetscFunctionReturn(0);
1087   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1088   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1089   if (size == 1) PetscFunctionReturn(0);
1090 
1091   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1092   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1093   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1094   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1095   for (i=0; i<first; i++) notme[i] = i;
1096   for (i=last; i<M; i++) notme[i-last+first] = i;
1097   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1098   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1099   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1100   Aoff = Aoffs[0];
1101   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1102   Boff = Boffs[0];
1103   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1104   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1105   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1106   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1107   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1108   ierr = PetscFree(notme);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1113 {
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   /* do nondiagonal part */
1128   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1129   /* send it on its way */
1130   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1131   /* do local part */
1132   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1133   /* receive remote parts */
1134   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 /*
1139   This only works correctly for square matrices where the subblock A->A is the
1140    diagonal block
1141 */
1142 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1143 {
1144   PetscErrorCode ierr;
1145   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1146 
1147   PetscFunctionBegin;
1148   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1149   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1150   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1155 {
1156   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1161   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1166 {
1167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1168   PetscErrorCode ierr;
1169 
1170   PetscFunctionBegin;
1171 #if defined(PETSC_USE_LOG)
1172   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1173 #endif
1174   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1175   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1176   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1177   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1178 #if defined(PETSC_USE_CTABLE)
1179   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1180 #else
1181   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1182 #endif
1183   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1184   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1185   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1186   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1187   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1188   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1189   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1190 
1191   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1193   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1199   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1200 #if defined(PETSC_HAVE_ELEMENTAL)
1201   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1202 #endif
1203 #if defined(PETSC_HAVE_HYPRE)
1204   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1206 #endif
1207   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1213 {
1214   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1215   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1216   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1217   PetscErrorCode ierr;
1218   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1219   int            fd;
1220   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1221   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1222   PetscScalar    *column_values;
1223   PetscInt       message_count,flowcontrolcount;
1224   FILE           *file;
1225 
1226   PetscFunctionBegin;
1227   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1228   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1229   nz   = A->nz + B->nz;
1230   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1231   if (!rank) {
1232     header[0] = MAT_FILE_CLASSID;
1233     header[1] = mat->rmap->N;
1234     header[2] = mat->cmap->N;
1235 
1236     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1238     /* get largest number of rows any processor has */
1239     rlen  = mat->rmap->n;
1240     range = mat->rmap->range;
1241     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1242   } else {
1243     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1244     rlen = mat->rmap->n;
1245   }
1246 
1247   /* load up the local row counts */
1248   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1249   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1250 
1251   /* store the row lengths to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1255     for (i=1; i<size; i++) {
1256       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1257       rlen = range[i+1] - range[i];
1258       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1260     }
1261     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1262   } else {
1263     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1264     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1265     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1266   }
1267   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1268 
1269   /* load up the local column indices */
1270   nzmax = nz; /* th processor needs space a largest processor needs */
1271   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1273   cnt   = 0;
1274   for (i=0; i<mat->rmap->n; i++) {
1275     for (j=B->i[i]; j<B->i[i+1]; j++) {
1276       if ((col = garray[B->j[j]]) > cstart) break;
1277       column_indices[cnt++] = col;
1278     }
1279     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1280     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1281   }
1282   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1283 
1284   /* store the column indices to the file */
1285   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1286   if (!rank) {
1287     MPI_Status status;
1288     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1289     for (i=1; i<size; i++) {
1290       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1291       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1292       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1293       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1294       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1295     }
1296     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1297   } else {
1298     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1299     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1300     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1302   }
1303   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1304 
1305   /* load up the local column values */
1306   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1307   cnt  = 0;
1308   for (i=0; i<mat->rmap->n; i++) {
1309     for (j=B->i[i]; j<B->i[i+1]; j++) {
1310       if (garray[B->j[j]] > cstart) break;
1311       column_values[cnt++] = B->a[j];
1312     }
1313     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1314     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1315   }
1316   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1317 
1318   /* store the column values to the file */
1319   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1320   if (!rank) {
1321     MPI_Status status;
1322     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1323     for (i=1; i<size; i++) {
1324       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1325       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1326       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1327       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1328       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1329     }
1330     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1331   } else {
1332     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1333     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1335     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1336   }
1337   ierr = PetscFree(column_values);CHKERRQ(ierr);
1338 
1339   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1340   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #include <petscdraw.h>
1345 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1346 {
1347   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1348   PetscErrorCode    ierr;
1349   PetscMPIInt       rank = aij->rank,size = aij->size;
1350   PetscBool         isdraw,iascii,isbinary;
1351   PetscViewer       sviewer;
1352   PetscViewerFormat format;
1353 
1354   PetscFunctionBegin;
1355   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1356   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1357   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1358   if (iascii) {
1359     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1360     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1361       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1362       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1363       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1364       for (i=0; i<(PetscInt)size; i++) {
1365         nmax = PetscMax(nmax,nz[i]);
1366         nmin = PetscMin(nmin,nz[i]);
1367         navg += nz[i];
1368       }
1369       ierr = PetscFree(nz);CHKERRQ(ierr);
1370       navg = navg/size;
1371       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1372       PetscFunctionReturn(0);
1373     }
1374     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1375     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1376       MatInfo   info;
1377       PetscBool inodes;
1378 
1379       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1380       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1381       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1382       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1383       if (!inodes) {
1384         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1385                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1386       } else {
1387         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1388                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1389       }
1390       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1391       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1392       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1393       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1394       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1395       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1396       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1397       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1398       PetscFunctionReturn(0);
1399     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1400       PetscInt inodecount,inodelimit,*inodes;
1401       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1402       if (inodes) {
1403         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1404       } else {
1405         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1406       }
1407       PetscFunctionReturn(0);
1408     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1409       PetscFunctionReturn(0);
1410     }
1411   } else if (isbinary) {
1412     if (size == 1) {
1413       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1414       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1415     } else {
1416       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1417     }
1418     PetscFunctionReturn(0);
1419   } else if (isdraw) {
1420     PetscDraw draw;
1421     PetscBool isnull;
1422     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1423     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1424     if (isnull) PetscFunctionReturn(0);
1425   }
1426 
1427   {
1428     /* assemble the entire matrix onto first processor. */
1429     Mat        A;
1430     Mat_SeqAIJ *Aloc;
1431     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1432     MatScalar  *a;
1433 
1434     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1435     if (!rank) {
1436       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1437     } else {
1438       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1439     }
1440     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1441     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1442     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1443     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1444     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1445 
1446     /* copy over the A part */
1447     Aloc = (Mat_SeqAIJ*)aij->A->data;
1448     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1449     row  = mat->rmap->rstart;
1450     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1451     for (i=0; i<m; i++) {
1452       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1453       row++;
1454       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1455     }
1456     aj = Aloc->j;
1457     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1458 
1459     /* copy over the B part */
1460     Aloc = (Mat_SeqAIJ*)aij->B->data;
1461     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1462     row  = mat->rmap->rstart;
1463     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1464     ct   = cols;
1465     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1466     for (i=0; i<m; i++) {
1467       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1468       row++;
1469       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1470     }
1471     ierr = PetscFree(ct);CHKERRQ(ierr);
1472     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1473     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1474     /*
1475        Everyone has to call to draw the matrix since the graphics waits are
1476        synchronized across all processors that share the PetscDraw object
1477     */
1478     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1479     if (!rank) {
1480       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1481       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1482     }
1483     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1484     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1485     ierr = MatDestroy(&A);CHKERRQ(ierr);
1486   }
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1491 {
1492   PetscErrorCode ierr;
1493   PetscBool      iascii,isdraw,issocket,isbinary;
1494 
1495   PetscFunctionBegin;
1496   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1497   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1498   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1499   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1500   if (iascii || isdraw || isbinary || issocket) {
1501     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1502   }
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1507 {
1508   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1509   PetscErrorCode ierr;
1510   Vec            bb1 = 0;
1511   PetscBool      hasop;
1512 
1513   PetscFunctionBegin;
1514   if (flag == SOR_APPLY_UPPER) {
1515     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1516     PetscFunctionReturn(0);
1517   }
1518 
1519   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1520     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1521   }
1522 
1523   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1524     if (flag & SOR_ZERO_INITIAL_GUESS) {
1525       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1526       its--;
1527     }
1528 
1529     while (its--) {
1530       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1532 
1533       /* update rhs: bb1 = bb - B*x */
1534       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1535       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1536 
1537       /* local sweep */
1538       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1539     }
1540   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1541     if (flag & SOR_ZERO_INITIAL_GUESS) {
1542       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1543       its--;
1544     }
1545     while (its--) {
1546       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548 
1549       /* update rhs: bb1 = bb - B*x */
1550       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1551       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1552 
1553       /* local sweep */
1554       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1555     }
1556   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1557     if (flag & SOR_ZERO_INITIAL_GUESS) {
1558       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1559       its--;
1560     }
1561     while (its--) {
1562       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564 
1565       /* update rhs: bb1 = bb - B*x */
1566       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1567       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1568 
1569       /* local sweep */
1570       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1571     }
1572   } else if (flag & SOR_EISENSTAT) {
1573     Vec xx1;
1574 
1575     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1576     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1577 
1578     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580     if (!mat->diag) {
1581       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1582       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1583     }
1584     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1585     if (hasop) {
1586       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1587     } else {
1588       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1589     }
1590     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1591 
1592     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1593 
1594     /* local sweep */
1595     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1596     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1597     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1598   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1599 
1600   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1601 
1602   matin->factorerrortype = mat->A->factorerrortype;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1607 {
1608   Mat            aA,aB,Aperm;
1609   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1610   PetscScalar    *aa,*ba;
1611   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1612   PetscSF        rowsf,sf;
1613   IS             parcolp = NULL;
1614   PetscBool      done;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1619   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1620   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1621   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1622 
1623   /* Invert row permutation to find out where my rows should go */
1624   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1625   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1626   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1627   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1628   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1629   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1630 
1631   /* Invert column permutation to find out where my columns should go */
1632   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1633   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1634   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1635   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1636   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1637   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1638   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1639 
1640   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1643 
1644   /* Find out where my gcols should go */
1645   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1646   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1647   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1648   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1649   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1650   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1651   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1652   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1653 
1654   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1655   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1656   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) {
1658     PetscInt row = rdest[i],rowner;
1659     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1660     for (j=ai[i]; j<ai[i+1]; j++) {
1661       PetscInt cowner,col = cdest[aj[j]];
1662       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1663       if (rowner == cowner) dnnz[i]++;
1664       else onnz[i]++;
1665     }
1666     for (j=bi[i]; j<bi[i+1]; j++) {
1667       PetscInt cowner,col = gcdest[bj[j]];
1668       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1669       if (rowner == cowner) dnnz[i]++;
1670       else onnz[i]++;
1671     }
1672   }
1673   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1674   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1675   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1676   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1677   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1678 
1679   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1680   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1681   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1682   for (i=0; i<m; i++) {
1683     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1684     PetscInt j0,rowlen;
1685     rowlen = ai[i+1] - ai[i];
1686     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1687       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1688       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1689     }
1690     rowlen = bi[i+1] - bi[i];
1691     for (j0=j=0; j<rowlen; j0=j) {
1692       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1693       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1694     }
1695   }
1696   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1699   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1700   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1701   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1702   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1704   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1705   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1706   *B = Aperm;
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1711 {
1712   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1717   if (ghosts) *ghosts = aij->garray;
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1722 {
1723   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1724   Mat            A    = mat->A,B = mat->B;
1725   PetscErrorCode ierr;
1726   PetscReal      isend[5],irecv[5];
1727 
1728   PetscFunctionBegin;
1729   info->block_size = 1.0;
1730   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1731 
1732   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1733   isend[3] = info->memory;  isend[4] = info->mallocs;
1734 
1735   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1736 
1737   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1738   isend[3] += info->memory;  isend[4] += info->mallocs;
1739   if (flag == MAT_LOCAL) {
1740     info->nz_used      = isend[0];
1741     info->nz_allocated = isend[1];
1742     info->nz_unneeded  = isend[2];
1743     info->memory       = isend[3];
1744     info->mallocs      = isend[4];
1745   } else if (flag == MAT_GLOBAL_MAX) {
1746     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1747 
1748     info->nz_used      = irecv[0];
1749     info->nz_allocated = irecv[1];
1750     info->nz_unneeded  = irecv[2];
1751     info->memory       = irecv[3];
1752     info->mallocs      = irecv[4];
1753   } else if (flag == MAT_GLOBAL_SUM) {
1754     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1755 
1756     info->nz_used      = irecv[0];
1757     info->nz_allocated = irecv[1];
1758     info->nz_unneeded  = irecv[2];
1759     info->memory       = irecv[3];
1760     info->mallocs      = irecv[4];
1761   }
1762   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1763   info->fill_ratio_needed = 0;
1764   info->factor_mallocs    = 0;
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1769 {
1770   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1771   PetscErrorCode ierr;
1772 
1773   PetscFunctionBegin;
1774   switch (op) {
1775   case MAT_NEW_NONZERO_LOCATIONS:
1776   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1777   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1778   case MAT_KEEP_NONZERO_PATTERN:
1779   case MAT_NEW_NONZERO_LOCATION_ERR:
1780   case MAT_USE_INODES:
1781   case MAT_IGNORE_ZERO_ENTRIES:
1782     MatCheckPreallocated(A,1);
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1785     break;
1786   case MAT_ROW_ORIENTED:
1787     MatCheckPreallocated(A,1);
1788     a->roworiented = flg;
1789 
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_NEW_DIAGONALS:
1794     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1795     break;
1796   case MAT_IGNORE_OFF_PROC_ENTRIES:
1797     a->donotstash = flg;
1798     break;
1799   case MAT_SPD:
1800     A->spd_set = PETSC_TRUE;
1801     A->spd     = flg;
1802     if (flg) {
1803       A->symmetric                  = PETSC_TRUE;
1804       A->structurally_symmetric     = PETSC_TRUE;
1805       A->symmetric_set              = PETSC_TRUE;
1806       A->structurally_symmetric_set = PETSC_TRUE;
1807     }
1808     break;
1809   case MAT_SYMMETRIC:
1810     MatCheckPreallocated(A,1);
1811     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_STRUCTURALLY_SYMMETRIC:
1814     MatCheckPreallocated(A,1);
1815     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_HERMITIAN:
1818     MatCheckPreallocated(A,1);
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_SYMMETRY_ETERNAL:
1822     MatCheckPreallocated(A,1);
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_SUBMAT_SINGLEIS:
1826     A->submat_singleis = flg;
1827     break;
1828   case MAT_STRUCTURE_ONLY:
1829     /* The option is handled directly by MatSetOption() */
1830     break;
1831   default:
1832     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1838 {
1839   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1840   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1841   PetscErrorCode ierr;
1842   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1843   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1844   PetscInt       *cmap,*idx_p;
1845 
1846   PetscFunctionBegin;
1847   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1848   mat->getrowactive = PETSC_TRUE;
1849 
1850   if (!mat->rowvalues && (idx || v)) {
1851     /*
1852         allocate enough space to hold information from the longest row.
1853     */
1854     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1855     PetscInt   max = 1,tmp;
1856     for (i=0; i<matin->rmap->n; i++) {
1857       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1858       if (max < tmp) max = tmp;
1859     }
1860     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1861   }
1862 
1863   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1864   lrow = row - rstart;
1865 
1866   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1867   if (!v)   {pvA = 0; pvB = 0;}
1868   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1869   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1870   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1871   nztot = nzA + nzB;
1872 
1873   cmap = mat->garray;
1874   if (v  || idx) {
1875     if (nztot) {
1876       /* Sort by increasing column numbers, assuming A and B already sorted */
1877       PetscInt imark = -1;
1878       if (v) {
1879         *v = v_p = mat->rowvalues;
1880         for (i=0; i<nzB; i++) {
1881           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1882           else break;
1883         }
1884         imark = i;
1885         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1886         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1887       }
1888       if (idx) {
1889         *idx = idx_p = mat->rowindices;
1890         if (imark > -1) {
1891           for (i=0; i<imark; i++) {
1892             idx_p[i] = cmap[cworkB[i]];
1893           }
1894         } else {
1895           for (i=0; i<nzB; i++) {
1896             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1897             else break;
1898           }
1899           imark = i;
1900         }
1901         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1902         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1903       }
1904     } else {
1905       if (idx) *idx = 0;
1906       if (v)   *v   = 0;
1907     }
1908   }
1909   *nz  = nztot;
1910   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1911   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1916 {
1917   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1918 
1919   PetscFunctionBegin;
1920   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1921   aij->getrowactive = PETSC_FALSE;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1926 {
1927   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1928   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1929   PetscErrorCode ierr;
1930   PetscInt       i,j,cstart = mat->cmap->rstart;
1931   PetscReal      sum = 0.0;
1932   MatScalar      *v;
1933 
1934   PetscFunctionBegin;
1935   if (aij->size == 1) {
1936     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1937   } else {
1938     if (type == NORM_FROBENIUS) {
1939       v = amat->a;
1940       for (i=0; i<amat->nz; i++) {
1941         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1942       }
1943       v = bmat->a;
1944       for (i=0; i<bmat->nz; i++) {
1945         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1946       }
1947       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1948       *norm = PetscSqrtReal(*norm);
1949       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1950     } else if (type == NORM_1) { /* max column norm */
1951       PetscReal *tmp,*tmp2;
1952       PetscInt  *jj,*garray = aij->garray;
1953       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1954       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1955       *norm = 0.0;
1956       v     = amat->a; jj = amat->j;
1957       for (j=0; j<amat->nz; j++) {
1958         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1959       }
1960       v = bmat->a; jj = bmat->j;
1961       for (j=0; j<bmat->nz; j++) {
1962         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1963       }
1964       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1965       for (j=0; j<mat->cmap->N; j++) {
1966         if (tmp2[j] > *norm) *norm = tmp2[j];
1967       }
1968       ierr = PetscFree(tmp);CHKERRQ(ierr);
1969       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1970       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1971     } else if (type == NORM_INFINITY) { /* max row norm */
1972       PetscReal ntemp = 0.0;
1973       for (j=0; j<aij->A->rmap->n; j++) {
1974         v   = amat->a + amat->i[j];
1975         sum = 0.0;
1976         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1977           sum += PetscAbsScalar(*v); v++;
1978         }
1979         v = bmat->a + bmat->i[j];
1980         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1981           sum += PetscAbsScalar(*v); v++;
1982         }
1983         if (sum > ntemp) ntemp = sum;
1984       }
1985       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1986       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1987     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1993 {
1994   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1995   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1996   PetscErrorCode ierr;
1997   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1998   PetscInt       cstart = A->cmap->rstart,ncol;
1999   Mat            B;
2000   MatScalar      *array;
2001 
2002   PetscFunctionBegin;
2003   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2004   ai = Aloc->i; aj = Aloc->j;
2005   bi = Bloc->i; bj = Bloc->j;
2006   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2007     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2008     PetscSFNode          *oloc;
2009     PETSC_UNUSED PetscSF sf;
2010 
2011     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2012     /* compute d_nnz for preallocation */
2013     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2014     for (i=0; i<ai[ma]; i++) {
2015       d_nnz[aj[i]]++;
2016       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2017     }
2018     /* compute local off-diagonal contributions */
2019     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2020     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2021     /* map those to global */
2022     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2023     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2024     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2025     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2026     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2027     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2028     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2029 
2030     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2031     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2032     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2033     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2034     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2035     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2036   } else {
2037     B    = *matout;
2038     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2039     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2040   }
2041 
2042   /* copy over the A part */
2043   array = Aloc->a;
2044   row   = A->rmap->rstart;
2045   for (i=0; i<ma; i++) {
2046     ncol = ai[i+1]-ai[i];
2047     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2048     row++;
2049     array += ncol; aj += ncol;
2050   }
2051   aj = Aloc->j;
2052   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2053 
2054   /* copy over the B part */
2055   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2056   array = Bloc->a;
2057   row   = A->rmap->rstart;
2058   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2059   cols_tmp = cols;
2060   for (i=0; i<mb; i++) {
2061     ncol = bi[i+1]-bi[i];
2062     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2063     row++;
2064     array += ncol; cols_tmp += ncol;
2065   }
2066   ierr = PetscFree(cols);CHKERRQ(ierr);
2067 
2068   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2071     *matout = B;
2072   } else {
2073     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2079 {
2080   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2081   Mat            a    = aij->A,b = aij->B;
2082   PetscErrorCode ierr;
2083   PetscInt       s1,s2,s3;
2084 
2085   PetscFunctionBegin;
2086   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2087   if (rr) {
2088     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2089     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2090     /* Overlap communication with computation. */
2091     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2092   }
2093   if (ll) {
2094     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2095     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2096     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2097   }
2098   /* scale  the diagonal block */
2099   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2100 
2101   if (rr) {
2102     /* Do a scatter end and then right scale the off-diagonal block */
2103     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2105   }
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2110 {
2111   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2120 {
2121   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2122   Mat            a,b,c,d;
2123   PetscBool      flg;
2124   PetscErrorCode ierr;
2125 
2126   PetscFunctionBegin;
2127   a = matA->A; b = matA->B;
2128   c = matB->A; d = matB->B;
2129 
2130   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2131   if (flg) {
2132     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2133   }
2134   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2139 {
2140   PetscErrorCode ierr;
2141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2142   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2143 
2144   PetscFunctionBegin;
2145   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2146   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2147     /* because of the column compression in the off-processor part of the matrix a->B,
2148        the number of columns in a->B and b->B may be different, hence we cannot call
2149        the MatCopy() directly on the two parts. If need be, we can provide a more
2150        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2151        then copying the submatrices */
2152     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2153   } else {
2154     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2155     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2156   }
2157   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2162 {
2163   PetscErrorCode ierr;
2164 
2165   PetscFunctionBegin;
2166   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /*
2171    Computes the number of nonzeros per row needed for preallocation when X and Y
2172    have different nonzero structure.
2173 */
2174 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2175 {
2176   PetscInt       i,j,k,nzx,nzy;
2177 
2178   PetscFunctionBegin;
2179   /* Set the number of nonzeros in the new matrix */
2180   for (i=0; i<m; i++) {
2181     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2182     nzx = xi[i+1] - xi[i];
2183     nzy = yi[i+1] - yi[i];
2184     nnz[i] = 0;
2185     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2186       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2187       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2188       nnz[i]++;
2189     }
2190     for (; k<nzy; k++) nnz[i]++;
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2196 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2197 {
2198   PetscErrorCode ierr;
2199   PetscInt       m = Y->rmap->N;
2200   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2201   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2202 
2203   PetscFunctionBegin;
2204   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2209 {
2210   PetscErrorCode ierr;
2211   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2212   PetscBLASInt   bnz,one=1;
2213   Mat_SeqAIJ     *x,*y;
2214 
2215   PetscFunctionBegin;
2216   if (str == SAME_NONZERO_PATTERN) {
2217     PetscScalar alpha = a;
2218     x    = (Mat_SeqAIJ*)xx->A->data;
2219     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2220     y    = (Mat_SeqAIJ*)yy->A->data;
2221     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2222     x    = (Mat_SeqAIJ*)xx->B->data;
2223     y    = (Mat_SeqAIJ*)yy->B->data;
2224     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2225     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2226     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2227   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2228     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2229   } else {
2230     Mat      B;
2231     PetscInt *nnz_d,*nnz_o;
2232     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2233     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2234     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2235     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2236     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2237     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2238     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2239     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2240     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2241     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2242     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2243     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2244     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2245     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2246   }
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2251 
2252 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2253 {
2254 #if defined(PETSC_USE_COMPLEX)
2255   PetscErrorCode ierr;
2256   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2257 
2258   PetscFunctionBegin;
2259   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2260   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2261 #else
2262   PetscFunctionBegin;
2263 #endif
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2268 {
2269   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2270   PetscErrorCode ierr;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2274   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2279 {
2280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2285   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2290 {
2291   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2292   PetscErrorCode ierr;
2293   PetscInt       i,*idxb = 0;
2294   PetscScalar    *va,*vb;
2295   Vec            vtmp;
2296 
2297   PetscFunctionBegin;
2298   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2299   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2300   if (idx) {
2301     for (i=0; i<A->rmap->n; i++) {
2302       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2303     }
2304   }
2305 
2306   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2307   if (idx) {
2308     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2309   }
2310   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2311   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2312 
2313   for (i=0; i<A->rmap->n; i++) {
2314     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2315       va[i] = vb[i];
2316       if (idx) idx[i] = a->garray[idxb[i]];
2317     }
2318   }
2319 
2320   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2321   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2322   ierr = PetscFree(idxb);CHKERRQ(ierr);
2323   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2328 {
2329   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2330   PetscErrorCode ierr;
2331   PetscInt       i,*idxb = 0;
2332   PetscScalar    *va,*vb;
2333   Vec            vtmp;
2334 
2335   PetscFunctionBegin;
2336   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2337   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2338   if (idx) {
2339     for (i=0; i<A->cmap->n; i++) {
2340       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2341     }
2342   }
2343 
2344   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2345   if (idx) {
2346     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2347   }
2348   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2349   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2350 
2351   for (i=0; i<A->rmap->n; i++) {
2352     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2353       va[i] = vb[i];
2354       if (idx) idx[i] = a->garray[idxb[i]];
2355     }
2356   }
2357 
2358   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2359   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2360   ierr = PetscFree(idxb);CHKERRQ(ierr);
2361   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2366 {
2367   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2368   PetscInt       n      = A->rmap->n;
2369   PetscInt       cstart = A->cmap->rstart;
2370   PetscInt       *cmap  = mat->garray;
2371   PetscInt       *diagIdx, *offdiagIdx;
2372   Vec            diagV, offdiagV;
2373   PetscScalar    *a, *diagA, *offdiagA;
2374   PetscInt       r;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2379   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2381   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2382   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2383   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2384   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2385   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2386   for (r = 0; r < n; ++r) {
2387     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2388       a[r]   = diagA[r];
2389       idx[r] = cstart + diagIdx[r];
2390     } else {
2391       a[r]   = offdiagA[r];
2392       idx[r] = cmap[offdiagIdx[r]];
2393     }
2394   }
2395   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2396   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2397   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2398   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2399   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2400   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2405 {
2406   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2407   PetscInt       n      = A->rmap->n;
2408   PetscInt       cstart = A->cmap->rstart;
2409   PetscInt       *cmap  = mat->garray;
2410   PetscInt       *diagIdx, *offdiagIdx;
2411   Vec            diagV, offdiagV;
2412   PetscScalar    *a, *diagA, *offdiagA;
2413   PetscInt       r;
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2418   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2419   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2420   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2421   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2422   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2423   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2424   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2425   for (r = 0; r < n; ++r) {
2426     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2427       a[r]   = diagA[r];
2428       idx[r] = cstart + diagIdx[r];
2429     } else {
2430       a[r]   = offdiagA[r];
2431       idx[r] = cmap[offdiagIdx[r]];
2432     }
2433   }
2434   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2436   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2437   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2438   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2439   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2444 {
2445   PetscErrorCode ierr;
2446   Mat            *dummy;
2447 
2448   PetscFunctionBegin;
2449   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2450   *newmat = *dummy;
2451   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2456 {
2457   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2458   PetscErrorCode ierr;
2459 
2460   PetscFunctionBegin;
2461   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2462   A->factorerrortype = a->A->factorerrortype;
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2467 {
2468   PetscErrorCode ierr;
2469   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2470 
2471   PetscFunctionBegin;
2472   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2473   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2474   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2475   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2480 {
2481   PetscFunctionBegin;
2482   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2483   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 /*@
2488    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2489 
2490    Collective on Mat
2491 
2492    Input Parameters:
2493 +    A - the matrix
2494 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2495 
2496  Level: advanced
2497 
2498 @*/
2499 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2500 {
2501   PetscErrorCode       ierr;
2502 
2503   PetscFunctionBegin;
2504   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2509 {
2510   PetscErrorCode       ierr;
2511   PetscBool            sc = PETSC_FALSE,flg;
2512 
2513   PetscFunctionBegin;
2514   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2515   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2516   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2517   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2518   if (flg) {
2519     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2520   }
2521   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2526 {
2527   PetscErrorCode ierr;
2528   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2529   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2530 
2531   PetscFunctionBegin;
2532   if (!Y->preallocated) {
2533     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2534   } else if (!aij->nz) {
2535     PetscInt nonew = aij->nonew;
2536     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2537     aij->nonew = nonew;
2538   }
2539   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2544 {
2545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2546   PetscErrorCode ierr;
2547 
2548   PetscFunctionBegin;
2549   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2550   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2551   if (d) {
2552     PetscInt rstart;
2553     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2554     *d += rstart;
2555 
2556   }
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 
2561 /* -------------------------------------------------------------------*/
2562 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2563                                        MatGetRow_MPIAIJ,
2564                                        MatRestoreRow_MPIAIJ,
2565                                        MatMult_MPIAIJ,
2566                                 /* 4*/ MatMultAdd_MPIAIJ,
2567                                        MatMultTranspose_MPIAIJ,
2568                                        MatMultTransposeAdd_MPIAIJ,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                 /*10*/ 0,
2573                                        0,
2574                                        0,
2575                                        MatSOR_MPIAIJ,
2576                                        MatTranspose_MPIAIJ,
2577                                 /*15*/ MatGetInfo_MPIAIJ,
2578                                        MatEqual_MPIAIJ,
2579                                        MatGetDiagonal_MPIAIJ,
2580                                        MatDiagonalScale_MPIAIJ,
2581                                        MatNorm_MPIAIJ,
2582                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2583                                        MatAssemblyEnd_MPIAIJ,
2584                                        MatSetOption_MPIAIJ,
2585                                        MatZeroEntries_MPIAIJ,
2586                                 /*24*/ MatZeroRows_MPIAIJ,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                 /*29*/ MatSetUp_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        MatGetDiagonalBlock_MPIAIJ,
2595                                        0,
2596                                 /*34*/ MatDuplicate_MPIAIJ,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*39*/ MatAXPY_MPIAIJ,
2602                                        MatCreateSubMatrices_MPIAIJ,
2603                                        MatIncreaseOverlap_MPIAIJ,
2604                                        MatGetValues_MPIAIJ,
2605                                        MatCopy_MPIAIJ,
2606                                 /*44*/ MatGetRowMax_MPIAIJ,
2607                                        MatScale_MPIAIJ,
2608                                        MatShift_MPIAIJ,
2609                                        MatDiagonalSet_MPIAIJ,
2610                                        MatZeroRowsColumns_MPIAIJ,
2611                                 /*49*/ MatSetRandom_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2617                                        0,
2618                                        MatSetUnfactored_MPIAIJ,
2619                                        MatPermute_MPIAIJ,
2620                                        0,
2621                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2622                                        MatDestroy_MPIAIJ,
2623                                        MatView_MPIAIJ,
2624                                        0,
2625                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2626                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2627                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2632                                        MatGetRowMinAbs_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                 /*75*/ MatFDColoringApply_AIJ,
2638                                        MatSetFromOptions_MPIAIJ,
2639                                        0,
2640                                        0,
2641                                        MatFindZeroDiagonals_MPIAIJ,
2642                                 /*80*/ 0,
2643                                        0,
2644                                        0,
2645                                 /*83*/ MatLoad_MPIAIJ,
2646                                        MatIsSymmetric_MPIAIJ,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2652                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2653                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2654                                        MatPtAP_MPIAIJ_MPIAIJ,
2655                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2656                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*99*/ 0,
2662                                        0,
2663                                        0,
2664                                        MatConjugate_MPIAIJ,
2665                                        0,
2666                                 /*104*/MatSetValuesRow_MPIAIJ,
2667                                        MatRealPart_MPIAIJ,
2668                                        MatImaginaryPart_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                 /*109*/0,
2672                                        0,
2673                                        MatGetRowMin_MPIAIJ,
2674                                        0,
2675                                        MatMissingDiagonal_MPIAIJ,
2676                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2677                                        0,
2678                                        MatGetGhosts_MPIAIJ,
2679                                        0,
2680                                        0,
2681                                 /*119*/0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        MatGetMultiProcBlock_MPIAIJ,
2686                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2687                                        MatGetColumnNorms_MPIAIJ,
2688                                        MatInvertBlockDiagonal_MPIAIJ,
2689                                        0,
2690                                        MatCreateSubMatricesMPI_MPIAIJ,
2691                                 /*129*/0,
2692                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2693                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2694                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2695                                        0,
2696                                 /*134*/0,
2697                                        0,
2698                                        MatRARt_MPIAIJ_MPIAIJ,
2699                                        0,
2700                                        0,
2701                                 /*139*/MatSetBlockSizes_MPIAIJ,
2702                                        0,
2703                                        0,
2704                                        MatFDColoringSetUp_MPIXAIJ,
2705                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2706                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2707 };
2708 
2709 /* ----------------------------------------------------------------------------------------*/
2710 
2711 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2712 {
2713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2718   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2723 {
2724   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2729   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2734 {
2735   Mat_MPIAIJ     *b;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2740   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2741   b = (Mat_MPIAIJ*)B->data;
2742 
2743 #if defined(PETSC_USE_CTABLE)
2744   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2745 #else
2746   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2747 #endif
2748   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2749   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2750   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2751 
2752   /* Because the B will have been resized we simply destroy it and create a new one each time */
2753   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2754   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2755   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2756   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2757   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2758   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2759 
2760   if (!B->preallocated) {
2761     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2762     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2763     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2764     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2765     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2766   }
2767 
2768   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2769   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2770   B->preallocated  = PETSC_TRUE;
2771   B->was_assembled = PETSC_FALSE;
2772   B->assembled     = PETSC_FALSE;;
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2777 {
2778   Mat_MPIAIJ     *b;
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2783   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2784   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2785   b = (Mat_MPIAIJ*)B->data;
2786 
2787 #if defined(PETSC_USE_CTABLE)
2788   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2789 #else
2790   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2791 #endif
2792   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2793   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2794   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2795 
2796   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2797   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2798   B->preallocated  = PETSC_TRUE;
2799   B->was_assembled = PETSC_FALSE;
2800   B->assembled = PETSC_FALSE;
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2805 {
2806   Mat            mat;
2807   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   *newmat = 0;
2812   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2813   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2814   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2815   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2816   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2817   a       = (Mat_MPIAIJ*)mat->data;
2818 
2819   mat->factortype   = matin->factortype;
2820   mat->assembled    = PETSC_TRUE;
2821   mat->insertmode   = NOT_SET_VALUES;
2822   mat->preallocated = PETSC_TRUE;
2823 
2824   a->size         = oldmat->size;
2825   a->rank         = oldmat->rank;
2826   a->donotstash   = oldmat->donotstash;
2827   a->roworiented  = oldmat->roworiented;
2828   a->rowindices   = 0;
2829   a->rowvalues    = 0;
2830   a->getrowactive = PETSC_FALSE;
2831 
2832   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2834 
2835   if (oldmat->colmap) {
2836 #if defined(PETSC_USE_CTABLE)
2837     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2838 #else
2839     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2840     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2841     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842 #endif
2843   } else a->colmap = 0;
2844   if (oldmat->garray) {
2845     PetscInt len;
2846     len  = oldmat->B->cmap->n;
2847     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2848     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2849     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2850   } else a->garray = 0;
2851 
2852   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2853   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2854   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2855   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2856 
2857   if (oldmat->Mvctx_mpi1) {
2858     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2859     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2860   }
2861 
2862   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2863   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2864   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2865   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2866   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2867   *newmat = mat;
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2872 {
2873   PetscScalar    *vals,*svals;
2874   MPI_Comm       comm;
2875   PetscErrorCode ierr;
2876   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2877   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2878   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2879   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2880   PetscInt       cend,cstart,n,*rowners;
2881   int            fd;
2882   PetscInt       bs = newMat->rmap->bs;
2883 
2884   PetscFunctionBegin;
2885   /* force binary viewer to load .info file if it has not yet done so */
2886   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2887   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2889   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2890   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2891   if (!rank) {
2892     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2893     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2894     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2895   }
2896 
2897   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2898   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2899   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2900   if (bs < 0) bs = 1;
2901 
2902   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2903   M    = header[1]; N = header[2];
2904 
2905   /* If global sizes are set, check if they are consistent with that given in the file */
2906   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2907   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2908 
2909   /* determine ownership of all (block) rows */
2910   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2911   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2912   else m = newMat->rmap->n; /* Set by user */
2913 
2914   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2915   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2916 
2917   /* First process needs enough room for process with most rows */
2918   if (!rank) {
2919     mmax = rowners[1];
2920     for (i=2; i<=size; i++) {
2921       mmax = PetscMax(mmax, rowners[i]);
2922     }
2923   } else mmax = -1;             /* unused, but compilers complain */
2924 
2925   rowners[0] = 0;
2926   for (i=2; i<=size; i++) {
2927     rowners[i] += rowners[i-1];
2928   }
2929   rstart = rowners[rank];
2930   rend   = rowners[rank+1];
2931 
2932   /* distribute row lengths to all processors */
2933   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2934   if (!rank) {
2935     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2936     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2937     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2938     for (j=0; j<m; j++) {
2939       procsnz[0] += ourlens[j];
2940     }
2941     for (i=1; i<size; i++) {
2942       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2943       /* calculate the number of nonzeros on each processor */
2944       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2945         procsnz[i] += rowlengths[j];
2946       }
2947       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2950   } else {
2951     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2952   }
2953 
2954   if (!rank) {
2955     /* determine max buffer needed and allocate it */
2956     maxnz = 0;
2957     for (i=0; i<size; i++) {
2958       maxnz = PetscMax(maxnz,procsnz[i]);
2959     }
2960     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2961 
2962     /* read in my part of the matrix column indices  */
2963     nz   = procsnz[0];
2964     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2965     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2966 
2967     /* read in every one elses and ship off */
2968     for (i=1; i<size; i++) {
2969       nz   = procsnz[i];
2970       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2971       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2972     }
2973     ierr = PetscFree(cols);CHKERRQ(ierr);
2974   } else {
2975     /* determine buffer space needed for message */
2976     nz = 0;
2977     for (i=0; i<m; i++) {
2978       nz += ourlens[i];
2979     }
2980     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2981 
2982     /* receive message of column indices*/
2983     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2984   }
2985 
2986   /* determine column ownership if matrix is not square */
2987   if (N != M) {
2988     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2989     else n = newMat->cmap->n;
2990     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2991     cstart = cend - n;
2992   } else {
2993     cstart = rstart;
2994     cend   = rend;
2995     n      = cend - cstart;
2996   }
2997 
2998   /* loop over local rows, determining number of off diagonal entries */
2999   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3000   jj   = 0;
3001   for (i=0; i<m; i++) {
3002     for (j=0; j<ourlens[i]; j++) {
3003       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3004       jj++;
3005     }
3006   }
3007 
3008   for (i=0; i<m; i++) {
3009     ourlens[i] -= offlens[i];
3010   }
3011   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3012 
3013   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3014 
3015   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3016 
3017   for (i=0; i<m; i++) {
3018     ourlens[i] += offlens[i];
3019   }
3020 
3021   if (!rank) {
3022     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3023 
3024     /* read in my part of the matrix numerical values  */
3025     nz   = procsnz[0];
3026     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3027 
3028     /* insert into matrix */
3029     jj      = rstart;
3030     smycols = mycols;
3031     svals   = vals;
3032     for (i=0; i<m; i++) {
3033       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3034       smycols += ourlens[i];
3035       svals   += ourlens[i];
3036       jj++;
3037     }
3038 
3039     /* read in other processors and ship out */
3040     for (i=1; i<size; i++) {
3041       nz   = procsnz[i];
3042       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3043       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3044     }
3045     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3046   } else {
3047     /* receive numeric values */
3048     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3049 
3050     /* receive message of values*/
3051     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3052 
3053     /* insert into matrix */
3054     jj      = rstart;
3055     smycols = mycols;
3056     svals   = vals;
3057     for (i=0; i<m; i++) {
3058       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3059       smycols += ourlens[i];
3060       svals   += ourlens[i];
3061       jj++;
3062     }
3063   }
3064   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3065   ierr = PetscFree(vals);CHKERRQ(ierr);
3066   ierr = PetscFree(mycols);CHKERRQ(ierr);
3067   ierr = PetscFree(rowners);CHKERRQ(ierr);
3068   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3069   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 /* Not scalable because of ISAllGather() unless getting all columns. */
3074 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3075 {
3076   PetscErrorCode ierr;
3077   IS             iscol_local;
3078   PetscBool      isstride;
3079   PetscMPIInt    lisstride=0,gisstride;
3080 
3081   PetscFunctionBegin;
3082   /* check if we are grabbing all columns*/
3083   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3084 
3085   if (isstride) {
3086     PetscInt  start,len,mstart,mlen;
3087     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3088     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3089     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3090     if (mstart == start && mlen-mstart == len) lisstride = 1;
3091   }
3092 
3093   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3094   if (gisstride) {
3095     PetscInt N;
3096     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3097     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3098     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3099     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3100   } else {
3101     PetscInt cbs;
3102     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3103     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3104     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3105   }
3106 
3107   *isseq = iscol_local;
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 /*
3112  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3113  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3114 
3115  Input Parameters:
3116    mat - matrix
3117    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3118            i.e., mat->rstart <= isrow[i] < mat->rend
3119    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3120            i.e., mat->cstart <= iscol[i] < mat->cend
3121  Output Parameter:
3122    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3123    iscol_o - sequential column index set for retrieving mat->B
3124    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3125  */
3126 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3127 {
3128   PetscErrorCode ierr;
3129   Vec            x,cmap;
3130   const PetscInt *is_idx;
3131   PetscScalar    *xarray,*cmaparray;
3132   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3133   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3134   Mat            B=a->B;
3135   Vec            lvec=a->lvec,lcmap;
3136   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3137   MPI_Comm       comm;
3138   VecScatter     Mvctx=a->Mvctx;
3139 
3140   PetscFunctionBegin;
3141   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3142   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3143 
3144   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3145   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3146   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3147   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3148   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3149 
3150   /* Get start indices */
3151   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3152   isstart -= ncols;
3153   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3154 
3155   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3156   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3157   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3158   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3159   for (i=0; i<ncols; i++) {
3160     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3161     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3162     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3163   }
3164   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3165   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3166   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3167 
3168   /* Get iscol_d */
3169   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3170   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3171   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3172 
3173   /* Get isrow_d */
3174   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3175   rstart = mat->rmap->rstart;
3176   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3177   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3178   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3179   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3180 
3181   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3182   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3183   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3184 
3185   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3186   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3187   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3188 
3189   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3190 
3191   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3192   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3193 
3194   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3195   /* off-process column indices */
3196   count = 0;
3197   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3198   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3199 
3200   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3201   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3202   for (i=0; i<Bn; i++) {
3203     if (PetscRealPart(xarray[i]) > -1.0) {
3204       idx[count]     = i;                   /* local column index in off-diagonal part B */
3205       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3206       count++;
3207     }
3208   }
3209   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3210   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3211 
3212   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3213   /* cannot ensure iscol_o has same blocksize as iscol! */
3214 
3215   ierr = PetscFree(idx);CHKERRQ(ierr);
3216   *garray = cmap1;
3217 
3218   ierr = VecDestroy(&x);CHKERRQ(ierr);
3219   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3220   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3225 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3226 {
3227   PetscErrorCode ierr;
3228   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3229   Mat            M = NULL;
3230   MPI_Comm       comm;
3231   IS             iscol_d,isrow_d,iscol_o;
3232   Mat            Asub = NULL,Bsub = NULL;
3233   PetscInt       n;
3234 
3235   PetscFunctionBegin;
3236   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3237 
3238   if (call == MAT_REUSE_MATRIX) {
3239     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3240     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3241     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3242 
3243     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3244     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3245 
3246     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3247     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3248 
3249     /* Update diagonal and off-diagonal portions of submat */
3250     asub = (Mat_MPIAIJ*)(*submat)->data;
3251     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3252     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3253     if (n) {
3254       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3255     }
3256     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3257     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3258 
3259   } else { /* call == MAT_INITIAL_MATRIX) */
3260     const PetscInt *garray;
3261     PetscInt        BsubN;
3262 
3263     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3264     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3265 
3266     /* Create local submatrices Asub and Bsub */
3267     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3268     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3269 
3270     /* Create submatrix M */
3271     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3272 
3273     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3274     asub = (Mat_MPIAIJ*)M->data;
3275 
3276     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3277     n = asub->B->cmap->N;
3278     if (BsubN > n) {
3279       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3280       const PetscInt *idx;
3281       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3282       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3283 
3284       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3285       j = 0;
3286       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3287       for (i=0; i<n; i++) {
3288         if (j >= BsubN) break;
3289         while (subgarray[i] > garray[j]) j++;
3290 
3291         if (subgarray[i] == garray[j]) {
3292           idx_new[i] = idx[j++];
3293         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3294       }
3295       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3296 
3297       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3298       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3299 
3300     } else if (BsubN < n) {
3301       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3302     }
3303 
3304     ierr = PetscFree(garray);CHKERRQ(ierr);
3305     *submat = M;
3306 
3307     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3308     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3309     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3310 
3311     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3312     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3313 
3314     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3315     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3316   }
3317   PetscFunctionReturn(0);
3318 }
3319 
3320 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3321 {
3322   PetscErrorCode ierr;
3323   IS             iscol_local=NULL,isrow_d;
3324   PetscInt       csize;
3325   PetscInt       n,i,j,start,end;
3326   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3327   MPI_Comm       comm;
3328 
3329   PetscFunctionBegin;
3330   /* If isrow has same processor distribution as mat,
3331      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3332   if (call == MAT_REUSE_MATRIX) {
3333     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3334     if (isrow_d) {
3335       sameRowDist  = PETSC_TRUE;
3336       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3337     } else {
3338       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3339       if (iscol_local) {
3340         sameRowDist  = PETSC_TRUE;
3341         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3342       }
3343     }
3344   } else {
3345     /* Check if isrow has same processor distribution as mat */
3346     sameDist[0] = PETSC_FALSE;
3347     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3348     if (!n) {
3349       sameDist[0] = PETSC_TRUE;
3350     } else {
3351       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3352       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3353       if (i >= start && j < end) {
3354         sameDist[0] = PETSC_TRUE;
3355       }
3356     }
3357 
3358     /* Check if iscol has same processor distribution as mat */
3359     sameDist[1] = PETSC_FALSE;
3360     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3361     if (!n) {
3362       sameDist[1] = PETSC_TRUE;
3363     } else {
3364       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3365       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3366       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3367     }
3368 
3369     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3370     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3371     sameRowDist = tsameDist[0];
3372   }
3373 
3374   if (sameRowDist) {
3375     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3376       /* isrow and iscol have same processor distribution as mat */
3377       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3378       PetscFunctionReturn(0);
3379     } else { /* sameRowDist */
3380       /* isrow has same processor distribution as mat */
3381       if (call == MAT_INITIAL_MATRIX) {
3382         PetscBool sorted;
3383         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3384         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3385         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3386         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3387 
3388         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3389         if (sorted) {
3390           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3391           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3392           PetscFunctionReturn(0);
3393         }
3394       } else { /* call == MAT_REUSE_MATRIX */
3395         IS    iscol_sub;
3396         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3397         if (iscol_sub) {
3398           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3399           PetscFunctionReturn(0);
3400         }
3401       }
3402     }
3403   }
3404 
3405   /* General case: iscol -> iscol_local which has global size of iscol */
3406   if (call == MAT_REUSE_MATRIX) {
3407     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3408     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3409   } else {
3410     if (!iscol_local) {
3411       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3412     }
3413   }
3414 
3415   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3416   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3417 
3418   if (call == MAT_INITIAL_MATRIX) {
3419     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3420     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3421   }
3422   PetscFunctionReturn(0);
3423 }
3424 
3425 /*@C
3426      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3427          and "off-diagonal" part of the matrix in CSR format.
3428 
3429    Collective on MPI_Comm
3430 
3431    Input Parameters:
3432 +  comm - MPI communicator
3433 .  A - "diagonal" portion of matrix
3434 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3435 -  garray - global index of B columns
3436 
3437    Output Parameter:
3438 .   mat - the matrix, with input A as its local diagonal matrix
3439    Level: advanced
3440 
3441    Notes:
3442        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3443        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3444 
3445 .seealso: MatCreateMPIAIJWithSplitArrays()
3446 @*/
3447 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3448 {
3449   PetscErrorCode ierr;
3450   Mat_MPIAIJ     *maij;
3451   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3452   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3453   PetscScalar    *oa=b->a;
3454   Mat            Bnew;
3455   PetscInt       m,n,N;
3456 
3457   PetscFunctionBegin;
3458   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3459   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3460   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3461   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3462   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3463   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3464 
3465   /* Get global columns of mat */
3466   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3467 
3468   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3469   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3470   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3471   maij = (Mat_MPIAIJ*)(*mat)->data;
3472 
3473   (*mat)->preallocated = PETSC_TRUE;
3474 
3475   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3476   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3477 
3478   /* Set A as diagonal portion of *mat */
3479   maij->A = A;
3480 
3481   nz = oi[m];
3482   for (i=0; i<nz; i++) {
3483     col   = oj[i];
3484     oj[i] = garray[col];
3485   }
3486 
3487    /* Set Bnew as off-diagonal portion of *mat */
3488   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3489   bnew        = (Mat_SeqAIJ*)Bnew->data;
3490   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3491   maij->B     = Bnew;
3492 
3493   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3494 
3495   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3496   b->free_a       = PETSC_FALSE;
3497   b->free_ij      = PETSC_FALSE;
3498   ierr = MatDestroy(&B);CHKERRQ(ierr);
3499 
3500   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3501   bnew->free_a       = PETSC_TRUE;
3502   bnew->free_ij      = PETSC_TRUE;
3503 
3504   /* condense columns of maij->B */
3505   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3506   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3507   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3508   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3509   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3510   PetscFunctionReturn(0);
3511 }
3512 
3513 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3514 
3515 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3516 {
3517   PetscErrorCode ierr;
3518   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3519   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3520   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3521   Mat            M,Msub,B=a->B;
3522   MatScalar      *aa;
3523   Mat_SeqAIJ     *aij;
3524   PetscInt       *garray = a->garray,*colsub,Ncols;
3525   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3526   IS             iscol_sub,iscmap;
3527   const PetscInt *is_idx,*cmap;
3528   PetscBool      allcolumns=PETSC_FALSE;
3529   MPI_Comm       comm;
3530 
3531   PetscFunctionBegin;
3532   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3533 
3534   if (call == MAT_REUSE_MATRIX) {
3535     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3536     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3537     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3538 
3539     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3540     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3541 
3542     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3543     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3544 
3545     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3546 
3547   } else { /* call == MAT_INITIAL_MATRIX) */
3548     PetscBool flg;
3549 
3550     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3551     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3552 
3553     /* (1) iscol -> nonscalable iscol_local */
3554     /* Check for special case: each processor gets entire matrix columns */
3555     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3556     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3557     if (allcolumns) {
3558       iscol_sub = iscol_local;
3559       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3560       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3561 
3562     } else {
3563       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3564       PetscInt *idx,*cmap1,k;
3565       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3566       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3567       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3568       count = 0;
3569       k     = 0;
3570       for (i=0; i<Ncols; i++) {
3571         j = is_idx[i];
3572         if (j >= cstart && j < cend) {
3573           /* diagonal part of mat */
3574           idx[count]     = j;
3575           cmap1[count++] = i; /* column index in submat */
3576         } else if (Bn) {
3577           /* off-diagonal part of mat */
3578           if (j == garray[k]) {
3579             idx[count]     = j;
3580             cmap1[count++] = i;  /* column index in submat */
3581           } else if (j > garray[k]) {
3582             while (j > garray[k] && k < Bn-1) k++;
3583             if (j == garray[k]) {
3584               idx[count]     = j;
3585               cmap1[count++] = i; /* column index in submat */
3586             }
3587           }
3588         }
3589       }
3590       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3591 
3592       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3593       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3594       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3595 
3596       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3597     }
3598 
3599     /* (3) Create sequential Msub */
3600     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3601   }
3602 
3603   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3604   aij  = (Mat_SeqAIJ*)(Msub)->data;
3605   ii   = aij->i;
3606   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3607 
3608   /*
3609       m - number of local rows
3610       Ncols - number of columns (same on all processors)
3611       rstart - first row in new global matrix generated
3612   */
3613   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3614 
3615   if (call == MAT_INITIAL_MATRIX) {
3616     /* (4) Create parallel newmat */
3617     PetscMPIInt    rank,size;
3618     PetscInt       csize;
3619 
3620     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3621     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3622 
3623     /*
3624         Determine the number of non-zeros in the diagonal and off-diagonal
3625         portions of the matrix in order to do correct preallocation
3626     */
3627 
3628     /* first get start and end of "diagonal" columns */
3629     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3630     if (csize == PETSC_DECIDE) {
3631       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3632       if (mglobal == Ncols) { /* square matrix */
3633         nlocal = m;
3634       } else {
3635         nlocal = Ncols/size + ((Ncols % size) > rank);
3636       }
3637     } else {
3638       nlocal = csize;
3639     }
3640     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3641     rstart = rend - nlocal;
3642     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3643 
3644     /* next, compute all the lengths */
3645     jj    = aij->j;
3646     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3647     olens = dlens + m;
3648     for (i=0; i<m; i++) {
3649       jend = ii[i+1] - ii[i];
3650       olen = 0;
3651       dlen = 0;
3652       for (j=0; j<jend; j++) {
3653         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3654         else dlen++;
3655         jj++;
3656       }
3657       olens[i] = olen;
3658       dlens[i] = dlen;
3659     }
3660 
3661     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3662     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3663 
3664     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3665     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3666     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3667     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3668     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3669     ierr = PetscFree(dlens);CHKERRQ(ierr);
3670 
3671   } else { /* call == MAT_REUSE_MATRIX */
3672     M    = *newmat;
3673     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3674     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3675     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3676     /*
3677          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3678        rather than the slower MatSetValues().
3679     */
3680     M->was_assembled = PETSC_TRUE;
3681     M->assembled     = PETSC_FALSE;
3682   }
3683 
3684   /* (5) Set values of Msub to *newmat */
3685   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3686   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3687 
3688   jj   = aij->j;
3689   aa   = aij->a;
3690   for (i=0; i<m; i++) {
3691     row = rstart + i;
3692     nz  = ii[i+1] - ii[i];
3693     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3694     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3695     jj += nz; aa += nz;
3696   }
3697   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3698 
3699   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701 
3702   ierr = PetscFree(colsub);CHKERRQ(ierr);
3703 
3704   /* save Msub, iscol_sub and iscmap used in processor for next request */
3705   if (call ==  MAT_INITIAL_MATRIX) {
3706     *newmat = M;
3707     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3708     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3709 
3710     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3711     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3712 
3713     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3714     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3715 
3716     if (iscol_local) {
3717       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3718       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3719     }
3720   }
3721   PetscFunctionReturn(0);
3722 }
3723 
3724 /*
3725     Not great since it makes two copies of the submatrix, first an SeqAIJ
3726   in local and then by concatenating the local matrices the end result.
3727   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3728 
3729   Note: This requires a sequential iscol with all indices.
3730 */
3731 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3732 {
3733   PetscErrorCode ierr;
3734   PetscMPIInt    rank,size;
3735   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3736   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3737   Mat            M,Mreuse;
3738   MatScalar      *aa,*vwork;
3739   MPI_Comm       comm;
3740   Mat_SeqAIJ     *aij;
3741   PetscBool      colflag,allcolumns=PETSC_FALSE;
3742 
3743   PetscFunctionBegin;
3744   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3745   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3746   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3747 
3748   /* Check for special case: each processor gets entire matrix columns */
3749   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3750   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3751   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3752 
3753   if (call ==  MAT_REUSE_MATRIX) {
3754     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3755     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3756     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3759   }
3760 
3761   /*
3762       m - number of local rows
3763       n - number of columns (same on all processors)
3764       rstart - first row in new global matrix generated
3765   */
3766   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3767   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3768   if (call == MAT_INITIAL_MATRIX) {
3769     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3770     ii  = aij->i;
3771     jj  = aij->j;
3772 
3773     /*
3774         Determine the number of non-zeros in the diagonal and off-diagonal
3775         portions of the matrix in order to do correct preallocation
3776     */
3777 
3778     /* first get start and end of "diagonal" columns */
3779     if (csize == PETSC_DECIDE) {
3780       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3781       if (mglobal == n) { /* square matrix */
3782         nlocal = m;
3783       } else {
3784         nlocal = n/size + ((n % size) > rank);
3785       }
3786     } else {
3787       nlocal = csize;
3788     }
3789     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3790     rstart = rend - nlocal;
3791     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3792 
3793     /* next, compute all the lengths */
3794     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3795     olens = dlens + m;
3796     for (i=0; i<m; i++) {
3797       jend = ii[i+1] - ii[i];
3798       olen = 0;
3799       dlen = 0;
3800       for (j=0; j<jend; j++) {
3801         if (*jj < rstart || *jj >= rend) olen++;
3802         else dlen++;
3803         jj++;
3804       }
3805       olens[i] = olen;
3806       dlens[i] = dlen;
3807     }
3808     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3809     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3810     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3811     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3812     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3813     ierr = PetscFree(dlens);CHKERRQ(ierr);
3814   } else {
3815     PetscInt ml,nl;
3816 
3817     M    = *newmat;
3818     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3819     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3820     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3821     /*
3822          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3823        rather than the slower MatSetValues().
3824     */
3825     M->was_assembled = PETSC_TRUE;
3826     M->assembled     = PETSC_FALSE;
3827   }
3828   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3829   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3830   ii   = aij->i;
3831   jj   = aij->j;
3832   aa   = aij->a;
3833   for (i=0; i<m; i++) {
3834     row   = rstart + i;
3835     nz    = ii[i+1] - ii[i];
3836     cwork = jj;     jj += nz;
3837     vwork = aa;     aa += nz;
3838     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3839   }
3840 
3841   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3842   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843   *newmat = M;
3844 
3845   /* save submatrix used in processor for next request */
3846   if (call ==  MAT_INITIAL_MATRIX) {
3847     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3848     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3849   }
3850   PetscFunctionReturn(0);
3851 }
3852 
3853 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3854 {
3855   PetscInt       m,cstart, cend,j,nnz,i,d;
3856   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3857   const PetscInt *JJ;
3858   PetscScalar    *values;
3859   PetscErrorCode ierr;
3860   PetscBool      nooffprocentries;
3861 
3862   PetscFunctionBegin;
3863   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3864 
3865   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3866   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3867   m      = B->rmap->n;
3868   cstart = B->cmap->rstart;
3869   cend   = B->cmap->rend;
3870   rstart = B->rmap->rstart;
3871 
3872   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3873 
3874 #if defined(PETSC_USE_DEBUG)
3875   for (i=0; i<m; i++) {
3876     nnz = Ii[i+1]- Ii[i];
3877     JJ  = J + Ii[i];
3878     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3879     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3880     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3881   }
3882 #endif
3883 
3884   for (i=0; i<m; i++) {
3885     nnz     = Ii[i+1]- Ii[i];
3886     JJ      = J + Ii[i];
3887     nnz_max = PetscMax(nnz_max,nnz);
3888     d       = 0;
3889     for (j=0; j<nnz; j++) {
3890       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3891     }
3892     d_nnz[i] = d;
3893     o_nnz[i] = nnz - d;
3894   }
3895   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3896   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3897 
3898   if (v) values = (PetscScalar*)v;
3899   else {
3900     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3901   }
3902 
3903   for (i=0; i<m; i++) {
3904     ii   = i + rstart;
3905     nnz  = Ii[i+1]- Ii[i];
3906     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3907   }
3908   nooffprocentries    = B->nooffprocentries;
3909   B->nooffprocentries = PETSC_TRUE;
3910   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3911   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3912   B->nooffprocentries = nooffprocentries;
3913 
3914   if (!v) {
3915     ierr = PetscFree(values);CHKERRQ(ierr);
3916   }
3917   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3918   PetscFunctionReturn(0);
3919 }
3920 
3921 /*@
3922    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3923    (the default parallel PETSc format).
3924 
3925    Collective on MPI_Comm
3926 
3927    Input Parameters:
3928 +  B - the matrix
3929 .  i - the indices into j for the start of each local row (starts with zero)
3930 .  j - the column indices for each local row (starts with zero)
3931 -  v - optional values in the matrix
3932 
3933    Level: developer
3934 
3935    Notes:
3936        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3937      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3938      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3939 
3940        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3941 
3942        The format which is used for the sparse matrix input, is equivalent to a
3943     row-major ordering.. i.e for the following matrix, the input data expected is
3944     as shown
3945 
3946 $        1 0 0
3947 $        2 0 3     P0
3948 $       -------
3949 $        4 5 6     P1
3950 $
3951 $     Process0 [P0]: rows_owned=[0,1]
3952 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3953 $        j =  {0,0,2}  [size = 3]
3954 $        v =  {1,2,3}  [size = 3]
3955 $
3956 $     Process1 [P1]: rows_owned=[2]
3957 $        i =  {0,3}    [size = nrow+1  = 1+1]
3958 $        j =  {0,1,2}  [size = 3]
3959 $        v =  {4,5,6}  [size = 3]
3960 
3961 .keywords: matrix, aij, compressed row, sparse, parallel
3962 
3963 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3964           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3965 @*/
3966 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3967 {
3968   PetscErrorCode ierr;
3969 
3970   PetscFunctionBegin;
3971   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3972   PetscFunctionReturn(0);
3973 }
3974 
3975 /*@C
3976    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3977    (the default parallel PETSc format).  For good matrix assembly performance
3978    the user should preallocate the matrix storage by setting the parameters
3979    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3980    performance can be increased by more than a factor of 50.
3981 
3982    Collective on MPI_Comm
3983 
3984    Input Parameters:
3985 +  B - the matrix
3986 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3987            (same value is used for all local rows)
3988 .  d_nnz - array containing the number of nonzeros in the various rows of the
3989            DIAGONAL portion of the local submatrix (possibly different for each row)
3990            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3991            The size of this array is equal to the number of local rows, i.e 'm'.
3992            For matrices that will be factored, you must leave room for (and set)
3993            the diagonal entry even if it is zero.
3994 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3995            submatrix (same value is used for all local rows).
3996 -  o_nnz - array containing the number of nonzeros in the various rows of the
3997            OFF-DIAGONAL portion of the local submatrix (possibly different for
3998            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3999            structure. The size of this array is equal to the number
4000            of local rows, i.e 'm'.
4001 
4002    If the *_nnz parameter is given then the *_nz parameter is ignored
4003 
4004    The AIJ format (also called the Yale sparse matrix format or
4005    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4006    storage.  The stored row and column indices begin with zero.
4007    See Users-Manual: ch_mat for details.
4008 
4009    The parallel matrix is partitioned such that the first m0 rows belong to
4010    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4011    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4012 
4013    The DIAGONAL portion of the local submatrix of a processor can be defined
4014    as the submatrix which is obtained by extraction the part corresponding to
4015    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4016    first row that belongs to the processor, r2 is the last row belonging to
4017    the this processor, and c1-c2 is range of indices of the local part of a
4018    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4019    common case of a square matrix, the row and column ranges are the same and
4020    the DIAGONAL part is also square. The remaining portion of the local
4021    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4022 
4023    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4024 
4025    You can call MatGetInfo() to get information on how effective the preallocation was;
4026    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4027    You can also run with the option -info and look for messages with the string
4028    malloc in them to see if additional memory allocation was needed.
4029 
4030    Example usage:
4031 
4032    Consider the following 8x8 matrix with 34 non-zero values, that is
4033    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4034    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4035    as follows:
4036 
4037 .vb
4038             1  2  0  |  0  3  0  |  0  4
4039     Proc0   0  5  6  |  7  0  0  |  8  0
4040             9  0 10  | 11  0  0  | 12  0
4041     -------------------------------------
4042            13  0 14  | 15 16 17  |  0  0
4043     Proc1   0 18  0  | 19 20 21  |  0  0
4044             0  0  0  | 22 23  0  | 24  0
4045     -------------------------------------
4046     Proc2  25 26 27  |  0  0 28  | 29  0
4047            30  0  0  | 31 32 33  |  0 34
4048 .ve
4049 
4050    This can be represented as a collection of submatrices as:
4051 
4052 .vb
4053       A B C
4054       D E F
4055       G H I
4056 .ve
4057 
4058    Where the submatrices A,B,C are owned by proc0, D,E,F are
4059    owned by proc1, G,H,I are owned by proc2.
4060 
4061    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4062    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4063    The 'M','N' parameters are 8,8, and have the same values on all procs.
4064 
4065    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4066    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4067    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4068    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4069    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4070    matrix, ans [DF] as another SeqAIJ matrix.
4071 
4072    When d_nz, o_nz parameters are specified, d_nz storage elements are
4073    allocated for every row of the local diagonal submatrix, and o_nz
4074    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4075    One way to choose d_nz and o_nz is to use the max nonzerors per local
4076    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4077    In this case, the values of d_nz,o_nz are:
4078 .vb
4079      proc0 : dnz = 2, o_nz = 2
4080      proc1 : dnz = 3, o_nz = 2
4081      proc2 : dnz = 1, o_nz = 4
4082 .ve
4083    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4084    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4085    for proc3. i.e we are using 12+15+10=37 storage locations to store
4086    34 values.
4087 
4088    When d_nnz, o_nnz parameters are specified, the storage is specified
4089    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4090    In the above case the values for d_nnz,o_nnz are:
4091 .vb
4092      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4093      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4094      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4095 .ve
4096    Here the space allocated is sum of all the above values i.e 34, and
4097    hence pre-allocation is perfect.
4098 
4099    Level: intermediate
4100 
4101 .keywords: matrix, aij, compressed row, sparse, parallel
4102 
4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4104           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4105 @*/
4106 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4107 {
4108   PetscErrorCode ierr;
4109 
4110   PetscFunctionBegin;
4111   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4112   PetscValidType(B,1);
4113   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4114   PetscFunctionReturn(0);
4115 }
4116 
4117 /*@
4118      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4119          CSR format the local rows.
4120 
4121    Collective on MPI_Comm
4122 
4123    Input Parameters:
4124 +  comm - MPI communicator
4125 .  m - number of local rows (Cannot be PETSC_DECIDE)
4126 .  n - This value should be the same as the local size used in creating the
4127        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4128        calculated if N is given) For square matrices n is almost always m.
4129 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4130 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4131 .   i - row indices
4132 .   j - column indices
4133 -   a - matrix values
4134 
4135    Output Parameter:
4136 .   mat - the matrix
4137 
4138    Level: intermediate
4139 
4140    Notes:
4141        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4142      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4143      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4144 
4145        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4146 
4147        The format which is used for the sparse matrix input, is equivalent to a
4148     row-major ordering.. i.e for the following matrix, the input data expected is
4149     as shown
4150 
4151 $        1 0 0
4152 $        2 0 3     P0
4153 $       -------
4154 $        4 5 6     P1
4155 $
4156 $     Process0 [P0]: rows_owned=[0,1]
4157 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4158 $        j =  {0,0,2}  [size = 3]
4159 $        v =  {1,2,3}  [size = 3]
4160 $
4161 $     Process1 [P1]: rows_owned=[2]
4162 $        i =  {0,3}    [size = nrow+1  = 1+1]
4163 $        j =  {0,1,2}  [size = 3]
4164 $        v =  {4,5,6}  [size = 3]
4165 
4166 .keywords: matrix, aij, compressed row, sparse, parallel
4167 
4168 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4169           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4170 @*/
4171 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4172 {
4173   PetscErrorCode ierr;
4174 
4175   PetscFunctionBegin;
4176   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4177   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4178   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4179   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4180   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4181   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4182   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4183   PetscFunctionReturn(0);
4184 }
4185 
4186 /*@C
4187    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4188    (the default parallel PETSc format).  For good matrix assembly performance
4189    the user should preallocate the matrix storage by setting the parameters
4190    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4191    performance can be increased by more than a factor of 50.
4192 
4193    Collective on MPI_Comm
4194 
4195    Input Parameters:
4196 +  comm - MPI communicator
4197 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4198            This value should be the same as the local size used in creating the
4199            y vector for the matrix-vector product y = Ax.
4200 .  n - This value should be the same as the local size used in creating the
4201        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4202        calculated if N is given) For square matrices n is almost always m.
4203 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4204 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4205 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4206            (same value is used for all local rows)
4207 .  d_nnz - array containing the number of nonzeros in the various rows of the
4208            DIAGONAL portion of the local submatrix (possibly different for each row)
4209            or NULL, if d_nz is used to specify the nonzero structure.
4210            The size of this array is equal to the number of local rows, i.e 'm'.
4211 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4212            submatrix (same value is used for all local rows).
4213 -  o_nnz - array containing the number of nonzeros in the various rows of the
4214            OFF-DIAGONAL portion of the local submatrix (possibly different for
4215            each row) or NULL, if o_nz is used to specify the nonzero
4216            structure. The size of this array is equal to the number
4217            of local rows, i.e 'm'.
4218 
4219    Output Parameter:
4220 .  A - the matrix
4221 
4222    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4223    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4224    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4225 
4226    Notes:
4227    If the *_nnz parameter is given then the *_nz parameter is ignored
4228 
4229    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4230    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4231    storage requirements for this matrix.
4232 
4233    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4234    processor than it must be used on all processors that share the object for
4235    that argument.
4236 
4237    The user MUST specify either the local or global matrix dimensions
4238    (possibly both).
4239 
4240    The parallel matrix is partitioned across processors such that the
4241    first m0 rows belong to process 0, the next m1 rows belong to
4242    process 1, the next m2 rows belong to process 2 etc.. where
4243    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4244    values corresponding to [m x N] submatrix.
4245 
4246    The columns are logically partitioned with the n0 columns belonging
4247    to 0th partition, the next n1 columns belonging to the next
4248    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4249 
4250    The DIAGONAL portion of the local submatrix on any given processor
4251    is the submatrix corresponding to the rows and columns m,n
4252    corresponding to the given processor. i.e diagonal matrix on
4253    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4254    etc. The remaining portion of the local submatrix [m x (N-n)]
4255    constitute the OFF-DIAGONAL portion. The example below better
4256    illustrates this concept.
4257 
4258    For a square global matrix we define each processor's diagonal portion
4259    to be its local rows and the corresponding columns (a square submatrix);
4260    each processor's off-diagonal portion encompasses the remainder of the
4261    local matrix (a rectangular submatrix).
4262 
4263    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4264 
4265    When calling this routine with a single process communicator, a matrix of
4266    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4267    type of communicator, use the construction mechanism
4268 .vb
4269      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4270 .ve
4271 
4272 $     MatCreate(...,&A);
4273 $     MatSetType(A,MATMPIAIJ);
4274 $     MatSetSizes(A, m,n,M,N);
4275 $     MatMPIAIJSetPreallocation(A,...);
4276 
4277    By default, this format uses inodes (identical nodes) when possible.
4278    We search for consecutive rows with the same nonzero structure, thereby
4279    reusing matrix information to achieve increased efficiency.
4280 
4281    Options Database Keys:
4282 +  -mat_no_inode  - Do not use inodes
4283 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4284 
4285 
4286 
4287    Example usage:
4288 
4289    Consider the following 8x8 matrix with 34 non-zero values, that is
4290    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4291    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4292    as follows
4293 
4294 .vb
4295             1  2  0  |  0  3  0  |  0  4
4296     Proc0   0  5  6  |  7  0  0  |  8  0
4297             9  0 10  | 11  0  0  | 12  0
4298     -------------------------------------
4299            13  0 14  | 15 16 17  |  0  0
4300     Proc1   0 18  0  | 19 20 21  |  0  0
4301             0  0  0  | 22 23  0  | 24  0
4302     -------------------------------------
4303     Proc2  25 26 27  |  0  0 28  | 29  0
4304            30  0  0  | 31 32 33  |  0 34
4305 .ve
4306 
4307    This can be represented as a collection of submatrices as
4308 
4309 .vb
4310       A B C
4311       D E F
4312       G H I
4313 .ve
4314 
4315    Where the submatrices A,B,C are owned by proc0, D,E,F are
4316    owned by proc1, G,H,I are owned by proc2.
4317 
4318    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4319    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4320    The 'M','N' parameters are 8,8, and have the same values on all procs.
4321 
4322    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4323    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4324    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4325    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4326    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4327    matrix, ans [DF] as another SeqAIJ matrix.
4328 
4329    When d_nz, o_nz parameters are specified, d_nz storage elements are
4330    allocated for every row of the local diagonal submatrix, and o_nz
4331    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4332    One way to choose d_nz and o_nz is to use the max nonzerors per local
4333    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4334    In this case, the values of d_nz,o_nz are
4335 .vb
4336      proc0 : dnz = 2, o_nz = 2
4337      proc1 : dnz = 3, o_nz = 2
4338      proc2 : dnz = 1, o_nz = 4
4339 .ve
4340    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4341    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4342    for proc3. i.e we are using 12+15+10=37 storage locations to store
4343    34 values.
4344 
4345    When d_nnz, o_nnz parameters are specified, the storage is specified
4346    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4347    In the above case the values for d_nnz,o_nnz are
4348 .vb
4349      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4350      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4351      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4352 .ve
4353    Here the space allocated is sum of all the above values i.e 34, and
4354    hence pre-allocation is perfect.
4355 
4356    Level: intermediate
4357 
4358 .keywords: matrix, aij, compressed row, sparse, parallel
4359 
4360 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4361           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4362 @*/
4363 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4364 {
4365   PetscErrorCode ierr;
4366   PetscMPIInt    size;
4367 
4368   PetscFunctionBegin;
4369   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4370   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4371   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4372   if (size > 1) {
4373     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4374     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4375   } else {
4376     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4377     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4378   }
4379   PetscFunctionReturn(0);
4380 }
4381 
4382 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4383 {
4384   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4385   PetscBool      flg;
4386   PetscErrorCode ierr;
4387 
4388   PetscFunctionBegin;
4389   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4390   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4391   if (Ad)     *Ad     = a->A;
4392   if (Ao)     *Ao     = a->B;
4393   if (colmap) *colmap = a->garray;
4394   PetscFunctionReturn(0);
4395 }
4396 
4397 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4398 {
4399   PetscErrorCode ierr;
4400   PetscInt       m,N,i,rstart,nnz,Ii;
4401   PetscInt       *indx;
4402   PetscScalar    *values;
4403 
4404   PetscFunctionBegin;
4405   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4406   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4407     PetscInt       *dnz,*onz,sum,bs,cbs;
4408 
4409     if (n == PETSC_DECIDE) {
4410       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4411     }
4412     /* Check sum(n) = N */
4413     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4414     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4415 
4416     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4417     rstart -= m;
4418 
4419     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4420     for (i=0; i<m; i++) {
4421       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4422       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4423       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4424     }
4425 
4426     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4427     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4428     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4429     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4430     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4431     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4432     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4433     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4434   }
4435 
4436   /* numeric phase */
4437   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4438   for (i=0; i<m; i++) {
4439     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4440     Ii   = i + rstart;
4441     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4442     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4443   }
4444   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4445   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4446   PetscFunctionReturn(0);
4447 }
4448 
4449 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4450 {
4451   PetscErrorCode    ierr;
4452   PetscMPIInt       rank;
4453   PetscInt          m,N,i,rstart,nnz;
4454   size_t            len;
4455   const PetscInt    *indx;
4456   PetscViewer       out;
4457   char              *name;
4458   Mat               B;
4459   const PetscScalar *values;
4460 
4461   PetscFunctionBegin;
4462   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4463   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4464   /* Should this be the type of the diagonal block of A? */
4465   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4466   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4467   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4468   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4469   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4470   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4471   for (i=0; i<m; i++) {
4472     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4473     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4474     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4475   }
4476   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4477   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4478 
4479   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4480   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4481   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4482   sprintf(name,"%s.%d",outfile,rank);
4483   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4484   ierr = PetscFree(name);CHKERRQ(ierr);
4485   ierr = MatView(B,out);CHKERRQ(ierr);
4486   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4487   ierr = MatDestroy(&B);CHKERRQ(ierr);
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4492 {
4493   PetscErrorCode      ierr;
4494   Mat_Merge_SeqsToMPI *merge;
4495   PetscContainer      container;
4496 
4497   PetscFunctionBegin;
4498   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4499   if (container) {
4500     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4505     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4507     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4508     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4509     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4510     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4511     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4512     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4513     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4514     ierr = PetscFree(merge);CHKERRQ(ierr);
4515     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4516   }
4517   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 #include <../src/mat/utils/freespace.h>
4522 #include <petscbt.h>
4523 
4524 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4525 {
4526   PetscErrorCode      ierr;
4527   MPI_Comm            comm;
4528   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4529   PetscMPIInt         size,rank,taga,*len_s;
4530   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4531   PetscInt            proc,m;
4532   PetscInt            **buf_ri,**buf_rj;
4533   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4534   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4535   MPI_Request         *s_waits,*r_waits;
4536   MPI_Status          *status;
4537   MatScalar           *aa=a->a;
4538   MatScalar           **abuf_r,*ba_i;
4539   Mat_Merge_SeqsToMPI *merge;
4540   PetscContainer      container;
4541 
4542   PetscFunctionBegin;
4543   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4544   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4545 
4546   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4547   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4548 
4549   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4550   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4551 
4552   bi     = merge->bi;
4553   bj     = merge->bj;
4554   buf_ri = merge->buf_ri;
4555   buf_rj = merge->buf_rj;
4556 
4557   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4558   owners = merge->rowmap->range;
4559   len_s  = merge->len_s;
4560 
4561   /* send and recv matrix values */
4562   /*-----------------------------*/
4563   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4564   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4565 
4566   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4567   for (proc=0,k=0; proc<size; proc++) {
4568     if (!len_s[proc]) continue;
4569     i    = owners[proc];
4570     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4571     k++;
4572   }
4573 
4574   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4575   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4576   ierr = PetscFree(status);CHKERRQ(ierr);
4577 
4578   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4579   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4580 
4581   /* insert mat values of mpimat */
4582   /*----------------------------*/
4583   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4584   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4585 
4586   for (k=0; k<merge->nrecv; k++) {
4587     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4588     nrows       = *(buf_ri_k[k]);
4589     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4590     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4591   }
4592 
4593   /* set values of ba */
4594   m = merge->rowmap->n;
4595   for (i=0; i<m; i++) {
4596     arow = owners[rank] + i;
4597     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4598     bnzi = bi[i+1] - bi[i];
4599     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4600 
4601     /* add local non-zero vals of this proc's seqmat into ba */
4602     anzi   = ai[arow+1] - ai[arow];
4603     aj     = a->j + ai[arow];
4604     aa     = a->a + ai[arow];
4605     nextaj = 0;
4606     for (j=0; nextaj<anzi; j++) {
4607       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4608         ba_i[j] += aa[nextaj++];
4609       }
4610     }
4611 
4612     /* add received vals into ba */
4613     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4614       /* i-th row */
4615       if (i == *nextrow[k]) {
4616         anzi   = *(nextai[k]+1) - *nextai[k];
4617         aj     = buf_rj[k] + *(nextai[k]);
4618         aa     = abuf_r[k] + *(nextai[k]);
4619         nextaj = 0;
4620         for (j=0; nextaj<anzi; j++) {
4621           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4622             ba_i[j] += aa[nextaj++];
4623           }
4624         }
4625         nextrow[k]++; nextai[k]++;
4626       }
4627     }
4628     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4629   }
4630   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4631   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4632 
4633   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4634   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4635   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4636   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4637   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4638   PetscFunctionReturn(0);
4639 }
4640 
4641 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4642 {
4643   PetscErrorCode      ierr;
4644   Mat                 B_mpi;
4645   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4646   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4647   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4648   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4649   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4650   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4651   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4652   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4653   MPI_Status          *status;
4654   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4655   PetscBT             lnkbt;
4656   Mat_Merge_SeqsToMPI *merge;
4657   PetscContainer      container;
4658 
4659   PetscFunctionBegin;
4660   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4661 
4662   /* make sure it is a PETSc comm */
4663   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4664   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4665   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4666 
4667   ierr = PetscNew(&merge);CHKERRQ(ierr);
4668   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4669 
4670   /* determine row ownership */
4671   /*---------------------------------------------------------*/
4672   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4673   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4674   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4675   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4676   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4677   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4678   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4679 
4680   m      = merge->rowmap->n;
4681   owners = merge->rowmap->range;
4682 
4683   /* determine the number of messages to send, their lengths */
4684   /*---------------------------------------------------------*/
4685   len_s = merge->len_s;
4686 
4687   len          = 0; /* length of buf_si[] */
4688   merge->nsend = 0;
4689   for (proc=0; proc<size; proc++) {
4690     len_si[proc] = 0;
4691     if (proc == rank) {
4692       len_s[proc] = 0;
4693     } else {
4694       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4695       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4696     }
4697     if (len_s[proc]) {
4698       merge->nsend++;
4699       nrows = 0;
4700       for (i=owners[proc]; i<owners[proc+1]; i++) {
4701         if (ai[i+1] > ai[i]) nrows++;
4702       }
4703       len_si[proc] = 2*(nrows+1);
4704       len         += len_si[proc];
4705     }
4706   }
4707 
4708   /* determine the number and length of messages to receive for ij-structure */
4709   /*-------------------------------------------------------------------------*/
4710   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4711   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4712 
4713   /* post the Irecv of j-structure */
4714   /*-------------------------------*/
4715   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4716   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4717 
4718   /* post the Isend of j-structure */
4719   /*--------------------------------*/
4720   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4721 
4722   for (proc=0, k=0; proc<size; proc++) {
4723     if (!len_s[proc]) continue;
4724     i    = owners[proc];
4725     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4726     k++;
4727   }
4728 
4729   /* receives and sends of j-structure are complete */
4730   /*------------------------------------------------*/
4731   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4732   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4733 
4734   /* send and recv i-structure */
4735   /*---------------------------*/
4736   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4737   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4738 
4739   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4740   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4741   for (proc=0,k=0; proc<size; proc++) {
4742     if (!len_s[proc]) continue;
4743     /* form outgoing message for i-structure:
4744          buf_si[0]:                 nrows to be sent
4745                [1:nrows]:           row index (global)
4746                [nrows+1:2*nrows+1]: i-structure index
4747     */
4748     /*-------------------------------------------*/
4749     nrows       = len_si[proc]/2 - 1;
4750     buf_si_i    = buf_si + nrows+1;
4751     buf_si[0]   = nrows;
4752     buf_si_i[0] = 0;
4753     nrows       = 0;
4754     for (i=owners[proc]; i<owners[proc+1]; i++) {
4755       anzi = ai[i+1] - ai[i];
4756       if (anzi) {
4757         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4758         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4759         nrows++;
4760       }
4761     }
4762     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4763     k++;
4764     buf_si += len_si[proc];
4765   }
4766 
4767   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4768   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4769 
4770   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4771   for (i=0; i<merge->nrecv; i++) {
4772     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4773   }
4774 
4775   ierr = PetscFree(len_si);CHKERRQ(ierr);
4776   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4777   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4778   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4779   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4780   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4781   ierr = PetscFree(status);CHKERRQ(ierr);
4782 
4783   /* compute a local seq matrix in each processor */
4784   /*----------------------------------------------*/
4785   /* allocate bi array and free space for accumulating nonzero column info */
4786   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4787   bi[0] = 0;
4788 
4789   /* create and initialize a linked list */
4790   nlnk = N+1;
4791   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4792 
4793   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4794   len  = ai[owners[rank+1]] - ai[owners[rank]];
4795   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4796 
4797   current_space = free_space;
4798 
4799   /* determine symbolic info for each local row */
4800   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4801 
4802   for (k=0; k<merge->nrecv; k++) {
4803     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804     nrows       = *buf_ri_k[k];
4805     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4806     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4807   }
4808 
4809   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4810   len  = 0;
4811   for (i=0; i<m; i++) {
4812     bnzi = 0;
4813     /* add local non-zero cols of this proc's seqmat into lnk */
4814     arow  = owners[rank] + i;
4815     anzi  = ai[arow+1] - ai[arow];
4816     aj    = a->j + ai[arow];
4817     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4818     bnzi += nlnk;
4819     /* add received col data into lnk */
4820     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4821       if (i == *nextrow[k]) { /* i-th row */
4822         anzi  = *(nextai[k]+1) - *nextai[k];
4823         aj    = buf_rj[k] + *nextai[k];
4824         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4825         bnzi += nlnk;
4826         nextrow[k]++; nextai[k]++;
4827       }
4828     }
4829     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4830 
4831     /* if free space is not available, make more free space */
4832     if (current_space->local_remaining<bnzi) {
4833       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4834       nspacedouble++;
4835     }
4836     /* copy data into free space, then initialize lnk */
4837     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4838     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4839 
4840     current_space->array           += bnzi;
4841     current_space->local_used      += bnzi;
4842     current_space->local_remaining -= bnzi;
4843 
4844     bi[i+1] = bi[i] + bnzi;
4845   }
4846 
4847   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4848 
4849   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4850   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4851   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4852 
4853   /* create symbolic parallel matrix B_mpi */
4854   /*---------------------------------------*/
4855   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4856   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4857   if (n==PETSC_DECIDE) {
4858     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4859   } else {
4860     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4861   }
4862   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4863   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4864   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4865   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4866   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4867 
4868   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4869   B_mpi->assembled    = PETSC_FALSE;
4870   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4871   merge->bi           = bi;
4872   merge->bj           = bj;
4873   merge->buf_ri       = buf_ri;
4874   merge->buf_rj       = buf_rj;
4875   merge->coi          = NULL;
4876   merge->coj          = NULL;
4877   merge->owners_co    = NULL;
4878 
4879   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4880 
4881   /* attach the supporting struct to B_mpi for reuse */
4882   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4883   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4884   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4885   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4886   *mpimat = B_mpi;
4887 
4888   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4889   PetscFunctionReturn(0);
4890 }
4891 
4892 /*@C
4893       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4894                  matrices from each processor
4895 
4896     Collective on MPI_Comm
4897 
4898    Input Parameters:
4899 +    comm - the communicators the parallel matrix will live on
4900 .    seqmat - the input sequential matrices
4901 .    m - number of local rows (or PETSC_DECIDE)
4902 .    n - number of local columns (or PETSC_DECIDE)
4903 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4904 
4905    Output Parameter:
4906 .    mpimat - the parallel matrix generated
4907 
4908     Level: advanced
4909 
4910    Notes:
4911      The dimensions of the sequential matrix in each processor MUST be the same.
4912      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4913      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4914 @*/
4915 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4916 {
4917   PetscErrorCode ierr;
4918   PetscMPIInt    size;
4919 
4920   PetscFunctionBegin;
4921   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4922   if (size == 1) {
4923     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4924     if (scall == MAT_INITIAL_MATRIX) {
4925       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4926     } else {
4927       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4928     }
4929     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930     PetscFunctionReturn(0);
4931   }
4932   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4933   if (scall == MAT_INITIAL_MATRIX) {
4934     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4935   }
4936   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4937   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 /*@
4942      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4943           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4944           with MatGetSize()
4945 
4946     Not Collective
4947 
4948    Input Parameters:
4949 +    A - the matrix
4950 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4951 
4952    Output Parameter:
4953 .    A_loc - the local sequential matrix generated
4954 
4955     Level: developer
4956 
4957 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4958 
4959 @*/
4960 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4961 {
4962   PetscErrorCode ierr;
4963   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4964   Mat_SeqAIJ     *mat,*a,*b;
4965   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4966   MatScalar      *aa,*ba,*cam;
4967   PetscScalar    *ca;
4968   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4969   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4970   PetscBool      match;
4971   MPI_Comm       comm;
4972   PetscMPIInt    size;
4973 
4974   PetscFunctionBegin;
4975   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4976   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4977   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4978   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4979   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4980 
4981   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4982   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4983   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4984   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4985   aa = a->a; ba = b->a;
4986   if (scall == MAT_INITIAL_MATRIX) {
4987     if (size == 1) {
4988       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4989       PetscFunctionReturn(0);
4990     }
4991 
4992     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4993     ci[0] = 0;
4994     for (i=0; i<am; i++) {
4995       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4996     }
4997     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4998     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4999     k    = 0;
5000     for (i=0; i<am; i++) {
5001       ncols_o = bi[i+1] - bi[i];
5002       ncols_d = ai[i+1] - ai[i];
5003       /* off-diagonal portion of A */
5004       for (jo=0; jo<ncols_o; jo++) {
5005         col = cmap[*bj];
5006         if (col >= cstart) break;
5007         cj[k]   = col; bj++;
5008         ca[k++] = *ba++;
5009       }
5010       /* diagonal portion of A */
5011       for (j=0; j<ncols_d; j++) {
5012         cj[k]   = cstart + *aj++;
5013         ca[k++] = *aa++;
5014       }
5015       /* off-diagonal portion of A */
5016       for (j=jo; j<ncols_o; j++) {
5017         cj[k]   = cmap[*bj++];
5018         ca[k++] = *ba++;
5019       }
5020     }
5021     /* put together the new matrix */
5022     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5023     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5024     /* Since these are PETSc arrays, change flags to free them as necessary. */
5025     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5026     mat->free_a  = PETSC_TRUE;
5027     mat->free_ij = PETSC_TRUE;
5028     mat->nonew   = 0;
5029   } else if (scall == MAT_REUSE_MATRIX) {
5030     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5031     ci = mat->i; cj = mat->j; cam = mat->a;
5032     for (i=0; i<am; i++) {
5033       /* off-diagonal portion of A */
5034       ncols_o = bi[i+1] - bi[i];
5035       for (jo=0; jo<ncols_o; jo++) {
5036         col = cmap[*bj];
5037         if (col >= cstart) break;
5038         *cam++ = *ba++; bj++;
5039       }
5040       /* diagonal portion of A */
5041       ncols_d = ai[i+1] - ai[i];
5042       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5043       /* off-diagonal portion of A */
5044       for (j=jo; j<ncols_o; j++) {
5045         *cam++ = *ba++; bj++;
5046       }
5047     }
5048   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5049   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5050   PetscFunctionReturn(0);
5051 }
5052 
5053 /*@C
5054      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5055 
5056     Not Collective
5057 
5058    Input Parameters:
5059 +    A - the matrix
5060 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5061 -    row, col - index sets of rows and columns to extract (or NULL)
5062 
5063    Output Parameter:
5064 .    A_loc - the local sequential matrix generated
5065 
5066     Level: developer
5067 
5068 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5069 
5070 @*/
5071 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5072 {
5073   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5074   PetscErrorCode ierr;
5075   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5076   IS             isrowa,iscola;
5077   Mat            *aloc;
5078   PetscBool      match;
5079 
5080   PetscFunctionBegin;
5081   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5082   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5083   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5084   if (!row) {
5085     start = A->rmap->rstart; end = A->rmap->rend;
5086     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5087   } else {
5088     isrowa = *row;
5089   }
5090   if (!col) {
5091     start = A->cmap->rstart;
5092     cmap  = a->garray;
5093     nzA   = a->A->cmap->n;
5094     nzB   = a->B->cmap->n;
5095     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5096     ncols = 0;
5097     for (i=0; i<nzB; i++) {
5098       if (cmap[i] < start) idx[ncols++] = cmap[i];
5099       else break;
5100     }
5101     imark = i;
5102     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5103     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5104     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5105   } else {
5106     iscola = *col;
5107   }
5108   if (scall != MAT_INITIAL_MATRIX) {
5109     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5110     aloc[0] = *A_loc;
5111   }
5112   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5113   if (!col) { /* attach global id of condensed columns */
5114     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5115   }
5116   *A_loc = aloc[0];
5117   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5118   if (!row) {
5119     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5120   }
5121   if (!col) {
5122     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5123   }
5124   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5125   PetscFunctionReturn(0);
5126 }
5127 
5128 /*@C
5129     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5130 
5131     Collective on Mat
5132 
5133    Input Parameters:
5134 +    A,B - the matrices in mpiaij format
5135 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5136 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5137 
5138    Output Parameter:
5139 +    rowb, colb - index sets of rows and columns of B to extract
5140 -    B_seq - the sequential matrix generated
5141 
5142     Level: developer
5143 
5144 @*/
5145 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5146 {
5147   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5148   PetscErrorCode ierr;
5149   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5150   IS             isrowb,iscolb;
5151   Mat            *bseq=NULL;
5152 
5153   PetscFunctionBegin;
5154   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5155     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5156   }
5157   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5158 
5159   if (scall == MAT_INITIAL_MATRIX) {
5160     start = A->cmap->rstart;
5161     cmap  = a->garray;
5162     nzA   = a->A->cmap->n;
5163     nzB   = a->B->cmap->n;
5164     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5165     ncols = 0;
5166     for (i=0; i<nzB; i++) {  /* row < local row index */
5167       if (cmap[i] < start) idx[ncols++] = cmap[i];
5168       else break;
5169     }
5170     imark = i;
5171     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5172     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5173     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5174     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5175   } else {
5176     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5177     isrowb  = *rowb; iscolb = *colb;
5178     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5179     bseq[0] = *B_seq;
5180   }
5181   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5182   *B_seq = bseq[0];
5183   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5184   if (!rowb) {
5185     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5186   } else {
5187     *rowb = isrowb;
5188   }
5189   if (!colb) {
5190     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5191   } else {
5192     *colb = iscolb;
5193   }
5194   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5195   PetscFunctionReturn(0);
5196 }
5197 
5198 /*
5199     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5200     of the OFF-DIAGONAL portion of local A
5201 
5202     Collective on Mat
5203 
5204    Input Parameters:
5205 +    A,B - the matrices in mpiaij format
5206 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5207 
5208    Output Parameter:
5209 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5210 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5211 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5212 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5213 
5214     Level: developer
5215 
5216 */
5217 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5218 {
5219   VecScatter_MPI_General *gen_to,*gen_from;
5220   PetscErrorCode         ierr;
5221   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5222   Mat_SeqAIJ             *b_oth;
5223   VecScatter             ctx;
5224   MPI_Comm               comm;
5225   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5226   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5227   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5228   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5229   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5230   MPI_Request            *rwaits = NULL,*swaits = NULL;
5231   MPI_Status             *sstatus,rstatus;
5232   PetscMPIInt            jj,size;
5233   VecScatterType         type;
5234   PetscBool              mpi1;
5235 
5236   PetscFunctionBegin;
5237   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5239 
5240   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5241     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5242   }
5243   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5244   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5245 
5246   if (size == 1) {
5247     startsj_s = NULL;
5248     bufa_ptr  = NULL;
5249     *B_oth    = NULL;
5250     PetscFunctionReturn(0);
5251   }
5252 
5253   ctx = a->Mvctx;
5254   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5255   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5256   if (!mpi1) {
5257     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5258      thus create a->Mvctx_mpi1 */
5259     if (!a->Mvctx_mpi1) {
5260       a->Mvctx_mpi1_flg = PETSC_TRUE;
5261       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5262     }
5263     ctx = a->Mvctx_mpi1;
5264   }
5265   tag = ((PetscObject)ctx)->tag;
5266 
5267   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5268   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5269   nrecvs   = gen_from->n;
5270   nsends   = gen_to->n;
5271 
5272   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5273   srow    = gen_to->indices;    /* local row index to be sent */
5274   sstarts = gen_to->starts;
5275   sprocs  = gen_to->procs;
5276   sstatus = gen_to->sstatus;
5277   sbs     = gen_to->bs;
5278   rstarts = gen_from->starts;
5279   rprocs  = gen_from->procs;
5280   rbs     = gen_from->bs;
5281 
5282   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5283   if (scall == MAT_INITIAL_MATRIX) {
5284     /* i-array */
5285     /*---------*/
5286     /*  post receives */
5287     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5288     for (i=0; i<nrecvs; i++) {
5289       rowlen = rvalues + rstarts[i]*rbs;
5290       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5291       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5292     }
5293 
5294     /* pack the outgoing message */
5295     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5296 
5297     sstartsj[0] = 0;
5298     rstartsj[0] = 0;
5299     len         = 0; /* total length of j or a array to be sent */
5300     k           = 0;
5301     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5302     for (i=0; i<nsends; i++) {
5303       rowlen = svalues + sstarts[i]*sbs;
5304       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5305       for (j=0; j<nrows; j++) {
5306         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5307         for (l=0; l<sbs; l++) {
5308           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5309 
5310           rowlen[j*sbs+l] = ncols;
5311 
5312           len += ncols;
5313           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5314         }
5315         k++;
5316       }
5317       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5318 
5319       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5320     }
5321     /* recvs and sends of i-array are completed */
5322     i = nrecvs;
5323     while (i--) {
5324       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5325     }
5326     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5327     ierr = PetscFree(svalues);CHKERRQ(ierr);
5328 
5329     /* allocate buffers for sending j and a arrays */
5330     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5331     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5332 
5333     /* create i-array of B_oth */
5334     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5335 
5336     b_othi[0] = 0;
5337     len       = 0; /* total length of j or a array to be received */
5338     k         = 0;
5339     for (i=0; i<nrecvs; i++) {
5340       rowlen = rvalues + rstarts[i]*rbs;
5341       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5342       for (j=0; j<nrows; j++) {
5343         b_othi[k+1] = b_othi[k] + rowlen[j];
5344         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5345         k++;
5346       }
5347       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5348     }
5349     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5350 
5351     /* allocate space for j and a arrrays of B_oth */
5352     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5353     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5354 
5355     /* j-array */
5356     /*---------*/
5357     /*  post receives of j-array */
5358     for (i=0; i<nrecvs; i++) {
5359       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5360       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5361     }
5362 
5363     /* pack the outgoing message j-array */
5364     k = 0;
5365     for (i=0; i<nsends; i++) {
5366       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5367       bufJ  = bufj+sstartsj[i];
5368       for (j=0; j<nrows; j++) {
5369         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5370         for (ll=0; ll<sbs; ll++) {
5371           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5372           for (l=0; l<ncols; l++) {
5373             *bufJ++ = cols[l];
5374           }
5375           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5376         }
5377       }
5378       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5379     }
5380 
5381     /* recvs and sends of j-array are completed */
5382     i = nrecvs;
5383     while (i--) {
5384       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5385     }
5386     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5387   } else if (scall == MAT_REUSE_MATRIX) {
5388     sstartsj = *startsj_s;
5389     rstartsj = *startsj_r;
5390     bufa     = *bufa_ptr;
5391     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5392     b_otha   = b_oth->a;
5393   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5394 
5395   /* a-array */
5396   /*---------*/
5397   /*  post receives of a-array */
5398   for (i=0; i<nrecvs; i++) {
5399     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5400     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5401   }
5402 
5403   /* pack the outgoing message a-array */
5404   k = 0;
5405   for (i=0; i<nsends; i++) {
5406     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5407     bufA  = bufa+sstartsj[i];
5408     for (j=0; j<nrows; j++) {
5409       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5410       for (ll=0; ll<sbs; ll++) {
5411         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5412         for (l=0; l<ncols; l++) {
5413           *bufA++ = vals[l];
5414         }
5415         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5416       }
5417     }
5418     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5419   }
5420   /* recvs and sends of a-array are completed */
5421   i = nrecvs;
5422   while (i--) {
5423     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5424   }
5425   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5426   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5427 
5428   if (scall == MAT_INITIAL_MATRIX) {
5429     /* put together the new matrix */
5430     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5431 
5432     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5433     /* Since these are PETSc arrays, change flags to free them as necessary. */
5434     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5435     b_oth->free_a  = PETSC_TRUE;
5436     b_oth->free_ij = PETSC_TRUE;
5437     b_oth->nonew   = 0;
5438 
5439     ierr = PetscFree(bufj);CHKERRQ(ierr);
5440     if (!startsj_s || !bufa_ptr) {
5441       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5442       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5443     } else {
5444       *startsj_s = sstartsj;
5445       *startsj_r = rstartsj;
5446       *bufa_ptr  = bufa;
5447     }
5448   }
5449   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5450   PetscFunctionReturn(0);
5451 }
5452 
5453 /*@C
5454   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5455 
5456   Not Collective
5457 
5458   Input Parameters:
5459 . A - The matrix in mpiaij format
5460 
5461   Output Parameter:
5462 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5463 . colmap - A map from global column index to local index into lvec
5464 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5465 
5466   Level: developer
5467 
5468 @*/
5469 #if defined(PETSC_USE_CTABLE)
5470 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5471 #else
5472 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5473 #endif
5474 {
5475   Mat_MPIAIJ *a;
5476 
5477   PetscFunctionBegin;
5478   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5479   PetscValidPointer(lvec, 2);
5480   PetscValidPointer(colmap, 3);
5481   PetscValidPointer(multScatter, 4);
5482   a = (Mat_MPIAIJ*) A->data;
5483   if (lvec) *lvec = a->lvec;
5484   if (colmap) *colmap = a->colmap;
5485   if (multScatter) *multScatter = a->Mvctx;
5486   PetscFunctionReturn(0);
5487 }
5488 
5489 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5490 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5491 #if defined(PETSC_HAVE_MKL_SPARSE)
5492 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5493 #endif
5494 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5495 #if defined(PETSC_HAVE_ELEMENTAL)
5496 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5497 #endif
5498 #if defined(PETSC_HAVE_HYPRE)
5499 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5500 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5501 #endif
5502 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5503 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5504 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5505 
5506 /*
5507     Computes (B'*A')' since computing B*A directly is untenable
5508 
5509                n                       p                          p
5510         (              )       (              )         (                  )
5511       m (      A       )  *  n (       B      )   =   m (         C        )
5512         (              )       (              )         (                  )
5513 
5514 */
5515 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5516 {
5517   PetscErrorCode ierr;
5518   Mat            At,Bt,Ct;
5519 
5520   PetscFunctionBegin;
5521   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5522   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5523   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5524   ierr = MatDestroy(&At);CHKERRQ(ierr);
5525   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5526   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5527   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5528   PetscFunctionReturn(0);
5529 }
5530 
5531 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5532 {
5533   PetscErrorCode ierr;
5534   PetscInt       m=A->rmap->n,n=B->cmap->n;
5535   Mat            Cmat;
5536 
5537   PetscFunctionBegin;
5538   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5539   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5540   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5541   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5542   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5543   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5544   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5545   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5546 
5547   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5548 
5549   *C = Cmat;
5550   PetscFunctionReturn(0);
5551 }
5552 
5553 /* ----------------------------------------------------------------*/
5554 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5555 {
5556   PetscErrorCode ierr;
5557 
5558   PetscFunctionBegin;
5559   if (scall == MAT_INITIAL_MATRIX) {
5560     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5561     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5562     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5563   }
5564   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5565   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5566   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5567   PetscFunctionReturn(0);
5568 }
5569 
5570 /*MC
5571    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5572 
5573    Options Database Keys:
5574 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5575 
5576   Level: beginner
5577 
5578 .seealso: MatCreateAIJ()
5579 M*/
5580 
5581 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5582 {
5583   Mat_MPIAIJ     *b;
5584   PetscErrorCode ierr;
5585   PetscMPIInt    size;
5586 
5587   PetscFunctionBegin;
5588   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5589 
5590   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5591   B->data       = (void*)b;
5592   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5593   B->assembled  = PETSC_FALSE;
5594   B->insertmode = NOT_SET_VALUES;
5595   b->size       = size;
5596 
5597   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5598 
5599   /* build cache for off array entries formed */
5600   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5601 
5602   b->donotstash  = PETSC_FALSE;
5603   b->colmap      = 0;
5604   b->garray      = 0;
5605   b->roworiented = PETSC_TRUE;
5606 
5607   /* stuff used for matrix vector multiply */
5608   b->lvec  = NULL;
5609   b->Mvctx = NULL;
5610 
5611   /* stuff for MatGetRow() */
5612   b->rowindices   = 0;
5613   b->rowvalues    = 0;
5614   b->getrowactive = PETSC_FALSE;
5615 
5616   /* flexible pointer used in CUSP/CUSPARSE classes */
5617   b->spptr = NULL;
5618 
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5628 #if defined(PETSC_HAVE_MKL_SPARSE)
5629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5630 #endif
5631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5633 #if defined(PETSC_HAVE_ELEMENTAL)
5634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5635 #endif
5636 #if defined(PETSC_HAVE_HYPRE)
5637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5638 #endif
5639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5644 #if defined(PETSC_HAVE_HYPRE)
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5646 #endif
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5649   PetscFunctionReturn(0);
5650 }
5651 
5652 /*@C
5653      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5654          and "off-diagonal" part of the matrix in CSR format.
5655 
5656    Collective on MPI_Comm
5657 
5658    Input Parameters:
5659 +  comm - MPI communicator
5660 .  m - number of local rows (Cannot be PETSC_DECIDE)
5661 .  n - This value should be the same as the local size used in creating the
5662        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5663        calculated if N is given) For square matrices n is almost always m.
5664 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5665 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5666 .   i - row indices for "diagonal" portion of matrix
5667 .   j - column indices
5668 .   a - matrix values
5669 .   oi - row indices for "off-diagonal" portion of matrix
5670 .   oj - column indices
5671 -   oa - matrix values
5672 
5673    Output Parameter:
5674 .   mat - the matrix
5675 
5676    Level: advanced
5677 
5678    Notes:
5679        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5680        must free the arrays once the matrix has been destroyed and not before.
5681 
5682        The i and j indices are 0 based
5683 
5684        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5685 
5686        This sets local rows and cannot be used to set off-processor values.
5687 
5688        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5689        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5690        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5691        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5692        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5693        communication if it is known that only local entries will be set.
5694 
5695 .keywords: matrix, aij, compressed row, sparse, parallel
5696 
5697 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5698           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5699 @*/
5700 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5701 {
5702   PetscErrorCode ierr;
5703   Mat_MPIAIJ     *maij;
5704 
5705   PetscFunctionBegin;
5706   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5707   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5708   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5709   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5710   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5711   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5712   maij = (Mat_MPIAIJ*) (*mat)->data;
5713 
5714   (*mat)->preallocated = PETSC_TRUE;
5715 
5716   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5717   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5718 
5719   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5720   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5721 
5722   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5723   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5724   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5725   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5726 
5727   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5728   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5729   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5730   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5731   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5732   PetscFunctionReturn(0);
5733 }
5734 
5735 /*
5736     Special version for direct calls from Fortran
5737 */
5738 #include <petsc/private/fortranimpl.h>
5739 
5740 /* Change these macros so can be used in void function */
5741 #undef CHKERRQ
5742 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5743 #undef SETERRQ2
5744 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5745 #undef SETERRQ3
5746 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5747 #undef SETERRQ
5748 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5749 
5750 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5751 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5752 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5753 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5754 #else
5755 #endif
5756 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5757 {
5758   Mat            mat  = *mmat;
5759   PetscInt       m    = *mm, n = *mn;
5760   InsertMode     addv = *maddv;
5761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5762   PetscScalar    value;
5763   PetscErrorCode ierr;
5764 
5765   MatCheckPreallocated(mat,1);
5766   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5767 
5768 #if defined(PETSC_USE_DEBUG)
5769   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5770 #endif
5771   {
5772     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5773     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5774     PetscBool roworiented = aij->roworiented;
5775 
5776     /* Some Variables required in the macro */
5777     Mat        A                 = aij->A;
5778     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5779     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5780     MatScalar  *aa               = a->a;
5781     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5782     Mat        B                 = aij->B;
5783     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5784     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5785     MatScalar  *ba               = b->a;
5786 
5787     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5788     PetscInt  nonew = a->nonew;
5789     MatScalar *ap1,*ap2;
5790 
5791     PetscFunctionBegin;
5792     for (i=0; i<m; i++) {
5793       if (im[i] < 0) continue;
5794 #if defined(PETSC_USE_DEBUG)
5795       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5796 #endif
5797       if (im[i] >= rstart && im[i] < rend) {
5798         row      = im[i] - rstart;
5799         lastcol1 = -1;
5800         rp1      = aj + ai[row];
5801         ap1      = aa + ai[row];
5802         rmax1    = aimax[row];
5803         nrow1    = ailen[row];
5804         low1     = 0;
5805         high1    = nrow1;
5806         lastcol2 = -1;
5807         rp2      = bj + bi[row];
5808         ap2      = ba + bi[row];
5809         rmax2    = bimax[row];
5810         nrow2    = bilen[row];
5811         low2     = 0;
5812         high2    = nrow2;
5813 
5814         for (j=0; j<n; j++) {
5815           if (roworiented) value = v[i*n+j];
5816           else value = v[i+j*m];
5817           if (in[j] >= cstart && in[j] < cend) {
5818             col = in[j] - cstart;
5819             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5820             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5821           } else if (in[j] < 0) continue;
5822 #if defined(PETSC_USE_DEBUG)
5823           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5824           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5825 #endif
5826           else {
5827             if (mat->was_assembled) {
5828               if (!aij->colmap) {
5829                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5830               }
5831 #if defined(PETSC_USE_CTABLE)
5832               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5833               col--;
5834 #else
5835               col = aij->colmap[in[j]] - 1;
5836 #endif
5837               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5838               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5839                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5840                 col  =  in[j];
5841                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5842                 B     = aij->B;
5843                 b     = (Mat_SeqAIJ*)B->data;
5844                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5845                 rp2   = bj + bi[row];
5846                 ap2   = ba + bi[row];
5847                 rmax2 = bimax[row];
5848                 nrow2 = bilen[row];
5849                 low2  = 0;
5850                 high2 = nrow2;
5851                 bm    = aij->B->rmap->n;
5852                 ba    = b->a;
5853               }
5854             } else col = in[j];
5855             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5856           }
5857         }
5858       } else if (!aij->donotstash) {
5859         if (roworiented) {
5860           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5861         } else {
5862           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5863         }
5864       }
5865     }
5866   }
5867   PetscFunctionReturnVoid();
5868 }
5869 
5870